linux/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c

/*
 * Copyright 2022 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: AMD
 *
 */
#include "display_mode_vba_util_32.h"
#include "../dml_inline_defs.h"
#include "display_mode_vba_32.h"
#include "../display_mode_lib.h"

#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096

unsigned int dml32_dscceComputeDelay(
		unsigned int bpc,
		double BPP,
		unsigned int sliceWidth,
		unsigned int numSlices,
		enum output_format_class pixelFormat,
		enum output_encoder_class Output)
{
	// valid bpc         = source bits per component in the set of {8, 10, 12}
	// valid bpp         = increments of 1/16 of a bit
	//                    min = 6/7/8 in N420/N422/444, respectively
	//                    max = such that compression is 1:1
	//valid sliceWidth  = number of pixels per slice line,
	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}

	// fixed value
	unsigned int rcModelSize = 8192;

	// N422/N420 operate at 2 pixels per clock
	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
	Delay, pixels;

	if (pixelFormat == dm_420)
		pixelsPerClock = 2;
	else if (pixelFormat == dm_n422)
		pixelsPerClock = 2;
	// #all other modes operate at 1 pixel per clock
	else
		pixelsPerClock = 1;

	//initial transmit delay as per PPS
	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);

	//compute ssm delay
	if (bpc == 8)
		D = 81;
	else if (bpc == 10)
		D = 89;
	else
		D = 113;

	//divide by pixel per cycle to compute slice width as seen by DSC
	w = sliceWidth / pixelsPerClock;

	//422 mode has an additional cycle of delay
	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
		s = 0;
	else
		s = 1;

	//main calculation for the dscce
	ix = initalXmitDelay + 45;
	wx = (w + 2) / 3;
	p = 3 * wx - w;
	l0 = ix / w;
	a = ix + p * l0;
	ax = (a + 2) / 3 + D + 6 + 1;
	L = (ax + wx - 1) / wx;
	if ((ix % w) == 0 && p != 0)
		lstall = 1;
	else
		lstall = 0;
	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;

	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
	pixels = Delay * 3 * pixelsPerClock;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
	dml_print("DML::%s: Output: %d\n", __func__, Output);
	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
#endif

	return pixels;
}

unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
{
	unsigned int Delay = 0;

	if (pixelFormat == dm_420) {
		//   sfr
		Delay = Delay + 2;
		//   dsccif
		Delay = Delay + 0;
		//   dscc - input deserializer
		Delay = Delay + 3;
		//   dscc gets pixels every other cycle
		Delay = Delay + 2;
		//   dscc - input cdc fifo
		Delay = Delay + 12;
		//   dscc gets pixels every other cycle
		Delay = Delay + 13;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   dscc - output cdc fifo
		Delay = Delay + 7;
		//   dscc gets pixels every other cycle
		Delay = Delay + 3;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   dscc - output serializer
		Delay = Delay + 1;
		//   sft
		Delay = Delay + 1;
	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
		//   sfr
		Delay = Delay + 2;
		//   dsccif
		Delay = Delay + 1;
		//   dscc - input deserializer
		Delay = Delay + 5;
		//  dscc - input cdc fifo
		Delay = Delay + 25;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   dscc - output cdc fifo
		Delay = Delay + 10;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   dscc - output serializer
		Delay = Delay + 1;
		//   sft
		Delay = Delay + 1;
	} else {
		//   sfr
		Delay = Delay + 2;
		//   dsccif
		Delay = Delay + 0;
		//   dscc - input deserializer
		Delay = Delay + 3;
		//   dscc - input cdc fifo
		Delay = Delay + 12;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   dscc - output cdc fifo
		Delay = Delay + 7;
		//   dscc - output serializer
		Delay = Delay + 1;
		//   dscc - cdc uncertainty
		Delay = Delay + 2;
		//   sft
		Delay = Delay + 1;
	}

	return Delay;
}


bool IsVertical(enum dm_rotation_angle Scan)
{
	bool is_vert = false;

	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
		is_vert = true;
	else
		is_vert = false;
	return is_vert;
}

void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
		double HRatio,
		double HRatioChroma,
		double VRatio,
		double VRatioChroma,
		double MaxDCHUBToPSCLThroughput,
		double MaxPSCLToLBThroughput,
		double PixelClock,
		enum source_format_class SourcePixelFormat,
		unsigned int HTaps,
		unsigned int HTapsChroma,
		unsigned int VTaps,
		unsigned int VTapsChroma,

		/* output */
		double *PSCL_THROUGHPUT,
		double *PSCL_THROUGHPUT_CHROMA,
		double *DPPCLKUsingSingleDPP)
{
	double DPPCLKUsingSingleDPPLuma;
	double DPPCLKUsingSingleDPPChroma;

	if (HRatio > 1) {
		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
				dml_ceil((double) HTaps / 6.0, 1.0));
	} else {
		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
	}

	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
			*PSCL_THROUGHPUT, 1);

	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;

	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
			SourcePixelFormat != dm_rgbe_alpha)) {
		*PSCL_THROUGHPUT_CHROMA = 0;
		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
	} else {
		if (HRatioChroma > 1) {
			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
		} else {
			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
		}
		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
	}
}

void dml32_CalculateBytePerPixelAndBlockSizes(
		enum source_format_class SourcePixelFormat,
		enum dm_swizzle_mode SurfaceTiling,

		/* Output */
		unsigned int *BytePerPixelY,
		unsigned int *BytePerPixelC,
		double  *BytePerPixelDETY,
		double  *BytePerPixelDETC,
		unsigned int *BlockHeight256BytesY,
		unsigned int *BlockHeight256BytesC,
		unsigned int *BlockWidth256BytesY,
		unsigned int *BlockWidth256BytesC,
		unsigned int *MacroTileHeightY,
		unsigned int *MacroTileHeightC,
		unsigned int *MacroTileWidthY,
		unsigned int *MacroTileWidthC)
{
	if (SourcePixelFormat == dm_444_64) {
		*BytePerPixelDETY = 8;
		*BytePerPixelDETC = 0;
		*BytePerPixelY = 8;
		*BytePerPixelC = 0;
	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
		*BytePerPixelDETY = 4;
		*BytePerPixelDETC = 0;
		*BytePerPixelY = 4;
		*BytePerPixelC = 0;
	} else if (SourcePixelFormat == dm_444_16) {
		*BytePerPixelDETY = 2;
		*BytePerPixelDETC = 0;
		*BytePerPixelY = 2;
		*BytePerPixelC = 0;
	} else if (SourcePixelFormat == dm_444_8) {
		*BytePerPixelDETY = 1;
		*BytePerPixelDETC = 0;
		*BytePerPixelY = 1;
		*BytePerPixelC = 0;
	} else if (SourcePixelFormat == dm_rgbe_alpha) {
		*BytePerPixelDETY = 4;
		*BytePerPixelDETC = 1;
		*BytePerPixelY = 4;
		*BytePerPixelC = 1;
	} else if (SourcePixelFormat == dm_420_8) {
		*BytePerPixelDETY = 1;
		*BytePerPixelDETC = 2;
		*BytePerPixelY = 1;
		*BytePerPixelC = 2;
	} else if (SourcePixelFormat == dm_420_12) {
		*BytePerPixelDETY = 2;
		*BytePerPixelDETC = 4;
		*BytePerPixelY = 2;
		*BytePerPixelC = 4;
	} else {
		*BytePerPixelDETY = 4.0 / 3;
		*BytePerPixelDETC = 8.0 / 3;
		*BytePerPixelY = 2;
		*BytePerPixelC = 4;
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
#endif
	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
			|| SourcePixelFormat == dm_444_16
			|| SourcePixelFormat == dm_444_8
			|| SourcePixelFormat == dm_mono_16
			|| SourcePixelFormat == dm_mono_8
			|| SourcePixelFormat == dm_rgbe)) {
		if (SurfaceTiling == dm_sw_linear)
			*BlockHeight256BytesY = 1;
		else if (SourcePixelFormat == dm_444_64)
			*BlockHeight256BytesY = 4;
		else if (SourcePixelFormat == dm_444_8)
			*BlockHeight256BytesY = 16;
		else
			*BlockHeight256BytesY = 8;

		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
		*BlockHeight256BytesC = 0;
		*BlockWidth256BytesC = 0;
	} else {
		if (SurfaceTiling == dm_sw_linear) {
			*BlockHeight256BytesY = 1;
			*BlockHeight256BytesC = 1;
		} else if (SourcePixelFormat == dm_rgbe_alpha) {
			*BlockHeight256BytesY = 8;
			*BlockHeight256BytesC = 16;
		} else if (SourcePixelFormat == dm_420_8) {
			*BlockHeight256BytesY = 16;
			*BlockHeight256BytesC = 8;
		} else {
			*BlockHeight256BytesY = 8;
			*BlockHeight256BytesC = 8;
		}
		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
#endif

	if (SurfaceTiling == dm_sw_linear) {
		*MacroTileHeightY = *BlockHeight256BytesY;
		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
		*MacroTileHeightC = *BlockHeight256BytesC;
		if (*MacroTileHeightC == 0)
			*MacroTileWidthC = 0;
		else
			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
		if (*MacroTileHeightC == 0)
			*MacroTileWidthC = 0;
		else
			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
	} else {
		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
		if (*MacroTileHeightC == 0)
			*MacroTileWidthC = 0;
		else
			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
#endif
} // CalculateBytePerPixelAndBlockSizes

void dml32_CalculateSwathAndDETConfiguration(
		unsigned int DETSizeOverride[],
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
		unsigned int ConfigReturnBufferSizeInKByte,
		unsigned int MaxTotalDETInKByte,
		unsigned int MinCompressedBufferSizeInKByte,
		double ForceSingleDPP,
		unsigned int NumberOfActiveSurfaces,
		unsigned int nomDETInKByte,
		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
		unsigned int PixelChunkSizeKBytes,
		unsigned int ROBSizeKBytes,
		unsigned int CompressedBufferSegmentSizeInkByteFinal,
		enum output_encoder_class Output[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double MaximumSwathWidthLuma[],
		double MaximumSwathWidthChroma[],
		enum dm_rotation_angle SourceRotation[],
		bool ViewportStationary[],
		enum source_format_class SourcePixelFormat[],
		enum dm_swizzle_mode SurfaceTiling[],
		unsigned int ViewportWidth[],
		unsigned int ViewportHeight[],
		unsigned int ViewportXStart[],
		unsigned int ViewportYStart[],
		unsigned int ViewportXStartC[],
		unsigned int ViewportYStartC[],
		unsigned int SurfaceWidthY[],
		unsigned int SurfaceWidthC[],
		unsigned int SurfaceHeightY[],
		unsigned int SurfaceHeightC[],
		unsigned int Read256BytesBlockHeightY[],
		unsigned int Read256BytesBlockHeightC[],
		unsigned int Read256BytesBlockWidthY[],
		unsigned int Read256BytesBlockWidthC[],
		enum odm_combine_mode ODMMode[],
		unsigned int BlendingAndTiming[],
		unsigned int BytePerPixY[],
		unsigned int BytePerPixC[],
		double BytePerPixDETY[],
		double BytePerPixDETC[],
		unsigned int HActive[],
		double HRatio[],
		double HRatioChroma[],
		unsigned int DPPPerSurface[],

		/* Output */
		unsigned int swath_width_luma_ub[],
		unsigned int swath_width_chroma_ub[],
		double SwathWidth[],
		double SwathWidthChroma[],
		unsigned int SwathHeightY[],
		unsigned int SwathHeightC[],
		unsigned int DETBufferSizeInKByte[],
		unsigned int DETBufferSizeY[],
		unsigned int DETBufferSizeC[],
		bool *UnboundedRequestEnabled,
		unsigned int *CompressedBufferSizeInkByte,
		unsigned int *CompBufReservedSpaceKBytes,
		bool *CompBufReservedSpaceNeedAdjustment,
		bool ViewportSizeSupportPerSurface[],
		bool *ViewportSizeSupport)
{
	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
	unsigned int RoundedUpSwathSizeBytesY;
	unsigned int RoundedUpSwathSizeBytesC;
	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
	unsigned int k;
	unsigned int TotalActiveDPP = 0;
	bool NoChromaSurfaces = true;
	unsigned int DETBufferSizeInKByteForSwathCalculation;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
#endif
	dml32_CalculateSwathWidth(ForceSingleDPP,
			NumberOfActiveSurfaces,
			SourcePixelFormat,
			SourceRotation,
			ViewportStationary,
			ViewportWidth,
			ViewportHeight,
			ViewportXStart,
			ViewportYStart,
			ViewportXStartC,
			ViewportYStartC,
			SurfaceWidthY,
			SurfaceWidthC,
			SurfaceHeightY,
			SurfaceHeightC,
			ODMMode,
			BytePerPixY,
			BytePerPixC,
			Read256BytesBlockHeightY,
			Read256BytesBlockHeightC,
			Read256BytesBlockWidthY,
			Read256BytesBlockWidthC,
			BlendingAndTiming,
			HActive,
			HRatio,
			DPPPerSurface,

			/* Output */
			SwathWidthdoubleDPP,
			SwathWidthdoubleDPPChroma,
			SwathWidth,
			SwathWidthChroma,
			MaximumSwathHeightY,
			MaximumSwathHeightC,
			swath_width_luma_ub,
			swath_width_chroma_ub);

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
				RoundedUpMaxSwathSizeBytesY[k]);
		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
				RoundedUpMaxSwathSizeBytesC[k]);
#endif

		if (SourcePixelFormat[k] == dm_420_10) {
			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
		}
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
			NoChromaSurfaces = false;
		}
	}

	// By default, just set the reserved space to 2 pixel chunks size
	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;

	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);

	if (*CompBufReservedSpaceNeedAdjustment == 1) {
		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
	}

	#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
	#endif

	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);

	dml32_CalculateDETBufferSize(DETSizeOverride,
			UseMALLForPStateChange,
			ForceSingleDPP,
			NumberOfActiveSurfaces,
			*UnboundedRequestEnabled,
			nomDETInKByte,
			MaxTotalDETInKByte,
			ConfigReturnBufferSizeInKByte,
			MinCompressedBufferSizeInKByte,
			CompressedBufferSegmentSizeInkByteFinal,
			SourcePixelFormat,
			ReadBandwidthLuma,
			ReadBandwidthChroma,
			RoundedUpMaxSwathSizeBytesY,
			RoundedUpMaxSwathSizeBytesC,
			DPPPerSurface,

			/* Output */
			DETBufferSizeInKByte,    // per hubp pipe
			CompressedBufferSizeInkByte);

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
#endif

	*ViewportSizeSupport = true;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {

		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
				DETBufferSizeInKByteForSwathCalculation);
#endif

		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
			SwathHeightY[k] = MaximumSwathHeightY[k];
			SwathHeightC[k] = MaximumSwathHeightC[k];
			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
			SwathHeightC[k] = MaximumSwathHeightC[k];
			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
			SwathHeightY[k] = MaximumSwathHeightY[k];
			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
		} else {
			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
		}

		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
			*ViewportSizeSupport = false;
			ViewportSizeSupportPerSurface[k] = false;
		} else {
			ViewportSizeSupportPerSurface[k] = true;
		}

		if (SwathHeightC[k] == 0) {
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
#endif
			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
			DETBufferSizeC[k] = 0;
		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
#endif
			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
		} else {
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
#endif
			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
		}

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
				k, RoundedUpMaxSwathSizeBytesY[k]);
		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
				k, RoundedUpMaxSwathSizeBytesC[k]);
		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
				ViewportSizeSupportPerSurface[k]);
#endif

	}
} // CalculateSwathAndDETConfiguration

void dml32_CalculateSwathWidth(
		bool				ForceSingleDPP,
		unsigned int			NumberOfActiveSurfaces,
		enum source_format_class	SourcePixelFormat[],
		enum dm_rotation_angle		SourceRotation[],
		bool				ViewportStationary[],
		unsigned int			ViewportWidth[],
		unsigned int			ViewportHeight[],
		unsigned int			ViewportXStart[],
		unsigned int			ViewportYStart[],
		unsigned int			ViewportXStartC[],
		unsigned int			ViewportYStartC[],
		unsigned int			SurfaceWidthY[],
		unsigned int			SurfaceWidthC[],
		unsigned int			SurfaceHeightY[],
		unsigned int			SurfaceHeightC[],
		enum odm_combine_mode		ODMMode[],
		unsigned int			BytePerPixY[],
		unsigned int			BytePerPixC[],
		unsigned int			Read256BytesBlockHeightY[],
		unsigned int			Read256BytesBlockHeightC[],
		unsigned int			Read256BytesBlockWidthY[],
		unsigned int			Read256BytesBlockWidthC[],
		unsigned int			BlendingAndTiming[],
		unsigned int			HActive[],
		double				HRatio[],
		unsigned int			DPPPerSurface[],

		/* Output */
		double				SwathWidthdoubleDPPY[],
		double				SwathWidthdoubleDPPC[],
		double				SwathWidthY[], // per-pipe
		double				SwathWidthC[], // per-pipe
		unsigned int			MaximumSwathHeightY[],
		unsigned int			MaximumSwathHeightC[],
		unsigned int			swath_width_luma_ub[], // per-pipe
		unsigned int			swath_width_chroma_ub[]) // per-pipe
{
	unsigned int k, j;
	enum odm_combine_mode MainSurfaceODMMode;

	unsigned int surface_width_ub_l;
	unsigned int surface_height_ub_l;
	unsigned int surface_width_ub_c = 0;
	unsigned int surface_height_ub_c = 0;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
#endif

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (!IsVertical(SourceRotation[k]))
			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
		else
			SwathWidthdoubleDPPY[k] = ViewportHeight[k];

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
#endif

		MainSurfaceODMMode = ODMMode[k];
		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
			if (BlendingAndTiming[k] == j)
				MainSurfaceODMMode = ODMMode[j];
		}

		if (ForceSingleDPP) {
			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
		} else {
			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
						dml_round(HActive[k] / 4.0 * HRatio[k]));
			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
						dml_round(HActive[k] / 2.0 * HRatio[k]));
			} else if (DPPPerSurface[k] == 2) {
				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
			} else {
				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
			}
		}

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
#endif

		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
				SourcePixelFormat[k] == dm_420_12) {
			SwathWidthC[k] = SwathWidthY[k] / 2;
			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
		} else {
			SwathWidthC[k] = SwathWidthY[k];
			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
		}

		if (ForceSingleDPP == true) {
			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
		}

		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);

		if (!IsVertical(SourceRotation[k])) {
			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
						dml_floor(ViewportXStart[k] +
								SwathWidthY[k] +
								Read256BytesBlockWidthY[k] - 1,
								Read256BytesBlockWidthY[k]) -
								dml_floor(ViewportXStart[k],
								Read256BytesBlockWidthY[k]));
			} else {
				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
						dml_ceil(SwathWidthY[k] - 1,
								Read256BytesBlockWidthY[k]) +
								Read256BytesBlockWidthY[k]);
			}
			if (BytePerPixC[k] > 0) {
				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
									Read256BytesBlockWidthC[k] - 1,
									Read256BytesBlockWidthC[k]) -
									dml_floor(ViewportXStartC[k],
									Read256BytesBlockWidthC[k]));
				} else {
					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
							dml_ceil(SwathWidthC[k] - 1,
								Read256BytesBlockWidthC[k]) +
								Read256BytesBlockWidthC[k]);
				}
			} else {
				swath_width_chroma_ub[k] = 0;
			}
		} else {
			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];

			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
						Read256BytesBlockHeightY[k]) -
						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
			} else {
				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
			}
			if (BytePerPixC[k] > 0) {
				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
									Read256BytesBlockHeightC[k] - 1,
									Read256BytesBlockHeightC[k]) -
									dml_floor(ViewportYStartC[k],
											Read256BytesBlockHeightC[k]));
				} else {
					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
							Read256BytesBlockHeightC[k]);
				}
			} else {
				swath_width_chroma_ub[k] = 0;
			}
		}

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
#endif

	}
} // CalculateSwathWidth

bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
			unsigned int TotalNumberOfActiveDPP,
			bool NoChroma,
			enum output_encoder_class Output,
			enum dm_swizzle_mode SurfaceTiling,
			bool CompBufReservedSpaceNeedAdjustment,
			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
{
	bool ret_val = false;

	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
			TotalNumberOfActiveDPP == 1 && NoChroma);
	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
		ret_val = false;

	if (SurfaceTiling == dm_sw_linear)
		ret_val = false;

	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
		ret_val = false;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
#endif

	return (ret_val);
}

void dml32_CalculateDETBufferSize(
		unsigned int DETSizeOverride[],
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
		bool ForceSingleDPP,
		unsigned int NumberOfActiveSurfaces,
		bool UnboundedRequestEnabled,
		unsigned int nomDETInKByte,
		unsigned int MaxTotalDETInKByte,
		unsigned int ConfigReturnBufferSizeInKByte,
		unsigned int MinCompressedBufferSizeInKByte,
		unsigned int CompressedBufferSegmentSizeInkByteFinal,
		enum source_format_class SourcePixelFormat[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		unsigned int RoundedUpMaxSwathSizeBytesY[],
		unsigned int RoundedUpMaxSwathSizeBytesC[],
		unsigned int DPPPerSurface[],
		/* Output */
		unsigned int DETBufferSizeInKByte[],
		unsigned int *CompressedBufferSizeInkByte)
{
	unsigned int DETBufferSizePoolInKByte;
	unsigned int NextDETBufferPieceInKByte;
	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
	bool NextPotentialSurfaceToAssignDETPieceFound;
	unsigned int NextSurfaceToAssignDETPiece;
	double TotalBandwidth;
	double BandwidthOfSurfacesNotAssignedDETPiece;
	unsigned int max_minDET;
	unsigned int minDET;
	unsigned int minDET_pipe;
	unsigned int j, k;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
			CompressedBufferSegmentSizeInkByteFinal);
#endif

	// Note: Will use default det size if that fits 2 swaths
	if (UnboundedRequestEnabled) {
		if (DETSizeOverride[0] > 0) {
			DETBufferSizeInKByte[0] = DETSizeOverride[0];
		} else {
			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
					((double) RoundedUpMaxSwathSizeBytesY[0] +
							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
		}
		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
	} else {
		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
			DETBufferSizeInKByte[k] = nomDETInKByte;
			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
					SourcePixelFormat[k] == dm_420_12) {
				max_minDET = nomDETInKByte - 64;
			} else {
				max_minDET = nomDETInKByte;
			}
			minDET = 128;
			minDET_pipe = 0;

			// add DET resource until can hold 2 full swaths
			while (minDET <= max_minDET && minDET_pipe == 0) {
				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
					minDET_pipe = minDET;
				minDET = minDET + 64;
			}

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
					RoundedUpMaxSwathSizeBytesY[k]);
			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
					RoundedUpMaxSwathSizeBytesC[k]);
#endif

			if (minDET_pipe == 0) {
				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
						__func__, k, minDET_pipe);
#endif
			}

			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
				DETBufferSizeInKByte[k] = 0;
			} else if (DETSizeOverride[k] > 0) {
				DETBufferSizeInKByte[k] = DETSizeOverride[k];
				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
				DETBufferSizeInKByte[k] = minDET_pipe;
				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
			}

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
#endif
		}

		TotalBandwidth = 0;
		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
#endif
		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
		for (k = 0; k < NumberOfActiveSurfaces; ++k) {

			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
				DETPieceAssignedToThisSurfaceAlready[k] = true;
			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
				DETPieceAssignedToThisSurfaceAlready[k] = true;
				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
			} else {
				DETPieceAssignedToThisSurfaceAlready[k] = false;
			}
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
					DETPieceAssignedToThisSurfaceAlready[k]);
			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
					BandwidthOfSurfacesNotAssignedDETPiece);
#endif
		}

		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
			NextPotentialSurfaceToAssignDETPieceFound = false;
			NextSurfaceToAssignDETPiece = 0;

			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
						ReadBandwidthLuma[k]);
				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
						ReadBandwidthChroma[k]);
				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
						NextSurfaceToAssignDETPiece);
#endif
				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
						(!NextPotentialSurfaceToAssignDETPieceFound ||
						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
					NextSurfaceToAssignDETPiece = k;
					NextPotentialSurfaceToAssignDETPieceFound = true;
				}
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
#endif
			}

			if (NextPotentialSurfaceToAssignDETPieceFound) {
				// Note: To show the banker's rounding behavior in VBA and also the fact
				// that the DET buffer size varies due to precision issue
				//
				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
				// BandwidthOfSurfacesNotAssignedDETPiece /
				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
				 //BandwidthOfSurfacesNotAssignedDETPiece /
				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
				//
				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);

				NextDETBufferPieceInKByte = dml_min(
					dml_round((double) DETBufferSizePoolInKByte *
						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
						BandwidthOfSurfacesNotAssignedDETPiece /
						((ForceSingleDPP ? 1 :
								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
						(ForceSingleDPP ? 1 :
								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
						dml_floor((double) DETBufferSizePoolInKByte,
						(ForceSingleDPP ? 1 :
								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));

				// Above calculation can assign the entire DET buffer allocation to a single pipe.
				// We should limit the per-pipe DET size to the nominal / max per pipe.
				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
					} else {
						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
						// already has the max per-pipe value
						NextDETBufferPieceInKByte = 0;
					}
				}

#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
					DETBufferSizePoolInKByte);
				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
					NextSurfaceToAssignDETPiece);
				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
					NextDETBufferPieceInKByte);
				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
					__func__, j, NextSurfaceToAssignDETPiece,
					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
#endif

				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
						+ NextDETBufferPieceInKByte
						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
#ifdef __DML_VBA_DEBUG__
				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
#endif

				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
			}
		}
		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
	}
	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
	}
#endif
} // CalculateDETBufferSize

void dml32_CalculateODMMode(
		unsigned int MaximumPixelsPerLinePerDSCUnit,
		unsigned int HActive,
		enum output_format_class OutFormat,
		enum output_encoder_class Output,
		enum odm_combine_policy ODMUse,
		double StateDispclk,
		double MaxDispclk,
		bool DSCEnable,
		unsigned int TotalNumberOfActiveDPP,
		unsigned int MaxNumDPP,
		double PixelClock,
		double DISPCLKDPPCLKDSCCLKDownSpreading,
		double DISPCLKRampingMargin,
		double DISPCLKDPPCLKVCOSpeed,
		unsigned int NumberOfDSCSlices,

		/* Output */
		bool *TotalAvailablePipesSupport,
		unsigned int *NumberOfDPP,
		enum odm_combine_mode *ODMMode,
		double *RequiredDISPCLKPerSurface)
{

	double SurfaceRequiredDISPCLKWithoutODMCombine;
	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;

	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
			MaxDispclk);
	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
			MaxDispclk);
	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
			MaxDispclk);
	*TotalAvailablePipesSupport = true;
	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable

	if (ODMUse == dm_odm_combine_policy_none)
		*ODMMode = dm_odm_combine_mode_disabled;

	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
	*NumberOfDPP = 0;

	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
	// (ODMUse == "" || ODMUse == "CombineAsNeeded")

	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
					|| NumberOfDSCSlices > 8)))) {
		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
			*ODMMode = dm_odm_combine_mode_4to1;
			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
			*NumberOfDPP = 4;
		} else {
			*TotalAvailablePipesSupport = false;
		}
	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
			*ODMMode = dm_odm_combine_mode_2to1;
			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
			*NumberOfDPP = 2;
		} else {
			*TotalAvailablePipesSupport = false;
		}
	} else {
		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
			*NumberOfDPP = 1;
		else
			*TotalAvailablePipesSupport = false;
	}
	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
			ODMUse != dm_odm_combine_policy_4to1) {
		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
			*ODMMode = dm_odm_combine_mode_disabled;
			*NumberOfDPP = 0;
			*TotalAvailablePipesSupport = false;
		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
				*ODMMode == dm_odm_combine_mode_4to1) {
			*ODMMode = dm_odm_combine_mode_4to1;
			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
			*NumberOfDPP = 4;
		} else {
			*ODMMode = dm_odm_combine_mode_2to1;
			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
			*NumberOfDPP = 2;
		}
	}
	if (Output == dm_hdmi && OutFormat == dm_420 &&
			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
		*ODMMode = dm_odm_combine_mode_disabled;
		*NumberOfDPP = 0;
		*TotalAvailablePipesSupport = false;
	}
}

double dml32_CalculateRequiredDispclk(
		enum odm_combine_mode ODMMode,
		double PixelClock,
		double DISPCLKDPPCLKDSCCLKDownSpreading,
		double DISPCLKRampingMargin,
		double DISPCLKDPPCLKVCOSpeed,
		double MaxDispclk)
{
	double RequiredDispclk = 0.;
	double PixelClockAfterODM;
	double DISPCLKWithRampingRoundedToDFSGranularity;
	double DISPCLKWithoutRampingRoundedToDFSGranularity;
	double MaxDispclkRoundedDownToDFSGranularity;

	if (ODMMode == dm_odm_combine_mode_4to1)
		PixelClockAfterODM = PixelClock / 4;
	else if (ODMMode == dm_odm_combine_mode_2to1)
		PixelClockAfterODM = PixelClock / 2;
	else
		PixelClockAfterODM = PixelClock;


	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);

	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);

	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);

	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
	else
		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;

	return RequiredDispclk;
}

double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
{
	if (Clock <= 0.0)
		return 0.0;

	if (round_up)
		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
	else
		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
}

void dml32_CalculateOutputLink(
		double PHYCLKPerState,
		double PHYCLKD18PerState,
		double PHYCLKD32PerState,
		double Downspreading,
		bool IsMainSurfaceUsingTheIndicatedTiming,
		enum output_encoder_class Output,
		enum output_format_class OutputFormat,
		unsigned int HTotal,
		unsigned int HActive,
		double PixelClockBackEnd,
		double ForcedOutputLinkBPP,
		unsigned int DSCInputBitPerComponent,
		unsigned int NumberOfDSCSlices,
		double AudioSampleRate,
		unsigned int AudioSampleLayout,
		enum odm_combine_mode ODMModeNoDSC,
		enum odm_combine_mode ODMModeDSC,
		bool DSCEnable,
		unsigned int OutputLinkDPLanes,
		enum dm_output_link_dp_rate OutputLinkDPRate,

		/* Output */
		bool *RequiresDSC,
		double *RequiresFEC,
		double  *OutBpp,
		enum dm_output_type *OutputType,
		enum dm_output_rate *OutputRate,
		unsigned int *RequiredSlots)
{
	bool LinkDSCEnable;
	unsigned int dummy;
	*RequiresDSC = false;
	*RequiresFEC = false;
	*OutBpp = 0;
	*OutputType = dm_output_type_unknown;
	*OutputRate = dm_output_rate_unknown;

	if (IsMainSurfaceUsingTheIndicatedTiming) {
		if (Output == dm_hdmi) {
			*RequiresDSC = false;
			*RequiresFEC = false;
			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
					ODMModeNoDSC, ODMModeDSC, &dummy);
			//OutputTypeAndRate = "HDMI";
			*OutputType = dm_output_type_hdmi;

		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
			if (DSCEnable == true) {
				*RequiresDSC = true;
				LinkDSCEnable = true;
				if (Output == dm_dp || Output == dm_dp2p0)
					*RequiresFEC = true;
				else
					*RequiresFEC = false;
			} else {
				*RequiresDSC = false;
				LinkDSCEnable = false;
				if (Output == dm_dp2p0)
					*RequiresFEC = true;
				else
					*RequiresFEC = false;
			}
			if (Output == dm_dp2p0) {
				*OutBpp = 0;
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
						PHYCLKD32PerState >= 10000.0 / 32) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
							ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " UHBR10";
					*OutputType = dm_output_type_dp2p0;
					*OutputRate = dm_output_rate_dp_rate_uhbr10;
				}
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
						*OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);

					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
							ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " UHBR13p5";
					*OutputType = dm_output_type_dp2p0;
					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
				}
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " UHBR20";
					*OutputType = dm_output_type_dp2p0;
					*OutputRate = dm_output_rate_dp_rate_uhbr20;
				}
			} else {
				*OutBpp = 0;
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
						PHYCLKPerState >= 270) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
							ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						if (Output == dm_dp)
							*RequiresFEC = true;
						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " HBR";
					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
					*OutputRate = dm_output_rate_dp_rate_hbr;
				}
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
						*OutBpp == 0 && PHYCLKPerState >= 540) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);

					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
							ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						if (Output == dm_dp)
							*RequiresFEC = true;

						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " HBR2";
					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
					*OutputRate = dm_output_rate_dp_rate_hbr2;
				}
				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
							ForcedOutputLinkBPP, LinkDSCEnable, Output,
							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
							RequiredSlots);

					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
						*RequiresDSC = true;
						LinkDSCEnable = true;
						if (Output == dm_dp)
							*RequiresFEC = true;

						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
								ForcedOutputLinkBPP, LinkDSCEnable, Output,
								OutputFormat, DSCInputBitPerComponent,
								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
					}
					//OutputTypeAndRate = Output & " HBR3";
					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
					*OutputRate = dm_output_rate_dp_rate_hbr3;
				}
			}
		}
	}
}

void dml32_CalculateDPPCLK(
		unsigned int NumberOfActiveSurfaces,
		double DISPCLKDPPCLKDSCCLKDownSpreading,
		double DISPCLKDPPCLKVCOSpeed,
		double DPPCLKUsingSingleDPP[],
		unsigned int DPPPerSurface[],

		/* output */
		double *GlobalDPPCLK,
		double Dppclk[])
{
	unsigned int k;
	*GlobalDPPCLK = 0;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
	}
	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
	for (k = 0; k < NumberOfActiveSurfaces; ++k)
		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
}

double dml32_TruncToValidBPP(
		double LinkBitRate,
		unsigned int Lanes,
		unsigned int HTotal,
		unsigned int HActive,
		double PixelClock,
		double DesiredBPP,
		bool DSCEnable,
		enum output_encoder_class Output,
		enum output_format_class Format,
		unsigned int DSCInputBitPerComponent,
		unsigned int DSCSlices,
		unsigned int AudioRate,
		unsigned int AudioLayout,
		enum odm_combine_mode ODMModeNoDSC,
		enum odm_combine_mode ODMModeDSC,
		/* Output */
		unsigned int *RequiredSlots)
{
	double    MaxLinkBPP;
	unsigned int   MinDSCBPP;
	double    MaxDSCBPP;
	unsigned int   NonDSCBPP0;
	unsigned int   NonDSCBPP1;
	unsigned int   NonDSCBPP2;

	if (Format == dm_420) {
		NonDSCBPP0 = 12;
		NonDSCBPP1 = 15;
		NonDSCBPP2 = 18;
		MinDSCBPP = 6;
		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
	} else if (Format == dm_444) {
		NonDSCBPP0 = 24;
		NonDSCBPP1 = 30;
		NonDSCBPP2 = 36;
		MinDSCBPP = 8;
		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
	} else {
		if (Output == dm_hdmi) {
			NonDSCBPP0 = 24;
			NonDSCBPP1 = 24;
			NonDSCBPP2 = 24;
		} else {
			NonDSCBPP0 = 16;
			NonDSCBPP1 = 20;
			NonDSCBPP2 = 24;
		}
		if (Format == dm_n422) {
			MinDSCBPP = 7;
			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
		} else {
			MinDSCBPP = 8;
			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
		}
	}
	if (Output == dm_dp2p0) {
		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
	} else if (DSCEnable && Output == dm_dp) {
		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
	} else {
		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
	}

	if (DSCEnable) {
		if (ODMModeDSC == dm_odm_combine_mode_4to1)
			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
		else if (ODMModeDSC == dm_odm_split_mode_1to2)
			MaxLinkBPP = 2 * MaxLinkBPP;
	} else {
		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
			MaxLinkBPP = 2 * MaxLinkBPP;
	}

	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);

	if (DesiredBPP == 0) {
		if (DSCEnable) {
			if (MaxLinkBPP < MinDSCBPP)
				return BPP_INVALID;
			else if (MaxLinkBPP >= MaxDSCBPP)
				return MaxDSCBPP;
			else
				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
		} else {
			if (MaxLinkBPP >= NonDSCBPP2)
				return NonDSCBPP2;
			else if (MaxLinkBPP >= NonDSCBPP1)
				return NonDSCBPP1;
			else if (MaxLinkBPP >= NonDSCBPP0)
				return 16.0;
			else
				return BPP_INVALID;
		}
	} else {
		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
				DesiredBPP <= NonDSCBPP0)) ||
				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
			return BPP_INVALID;
		else
			return DesiredBPP;
	}
} // TruncToValidBPP

double dml32_RequiredDTBCLK(
		bool              DSCEnable,
		double               PixelClock,
		enum output_format_class  OutputFormat,
		double               OutputBpp,
		unsigned int              DSCSlices,
		unsigned int                 HTotal,
		unsigned int                 HActive,
		unsigned int              AudioRate,
		unsigned int              AudioLayout)
{
	double PixelWordRate;
	double HCActive;
	double HCBlank;
	double AverageTribyteRate;
	double HActiveTribyteRate;

	if (DSCEnable != true)
		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);

	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
	HCBlank = 64 + 32 *
			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
}

unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
		enum odm_combine_mode ODMMode,
		unsigned int DSCInputBitPerComponent,
		double OutputBpp,
		unsigned int HActive,
		unsigned int HTotal,
		unsigned int NumberOfDSCSlices,
		enum output_format_class  OutputFormat,
		enum output_encoder_class Output,
		double PixelClock,
		double PixelClockBackEnd,
		double dsc_delay_factor_wa)
{
	unsigned int DSCDelayRequirement_val;

	if (DSCEnabled == true && OutputBpp != 0) {
		if (ODMMode == dm_odm_combine_mode_4to1) {
			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
		} else if (ODMMode == dm_odm_combine_mode_2to1) {
			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
		} else {
			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
		}

		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);

		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;

	} else {
		DSCDelayRequirement_val = 0;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
#endif

	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
}

void dml32_CalculateSurfaceSizeInMall(
		unsigned int NumberOfActiveSurfaces,
		unsigned int MALLAllocatedForDCN,
		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
		bool DCCEnable[],
		bool ViewportStationary[],
		unsigned int ViewportXStartY[],
		unsigned int ViewportYStartY[],
		unsigned int ViewportXStartC[],
		unsigned int ViewportYStartC[],
		unsigned int ViewportWidthY[],
		unsigned int ViewportHeightY[],
		unsigned int BytesPerPixelY[],
		unsigned int ViewportWidthC[],
		unsigned int ViewportHeightC[],
		unsigned int BytesPerPixelC[],
		unsigned int SurfaceWidthY[],
		unsigned int SurfaceWidthC[],
		unsigned int SurfaceHeightY[],
		unsigned int SurfaceHeightC[],
		unsigned int Read256BytesBlockWidthY[],
		unsigned int Read256BytesBlockWidthC[],
		unsigned int Read256BytesBlockHeightY[],
		unsigned int Read256BytesBlockHeightC[],
		unsigned int ReadBlockWidthY[],
		unsigned int ReadBlockWidthC[],
		unsigned int ReadBlockHeightY[],
		unsigned int ReadBlockHeightC[],
		unsigned int DCCMetaPitchY[],
		unsigned int DCCMetaPitchC[],

		/* Output */
		unsigned int    SurfaceSizeInMALL[],
		bool *ExceededMALLSize)
{
	unsigned int k;
	unsigned int TotalSurfaceSizeInMALLForSS = 0;
	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (ViewportStationary[k]) {
			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];

			if (ReadBlockWidthC[k] > 0) {
				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
							BytesPerPixelC[k];
			}
			if (DCCEnable[k] == true) {
				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
				if (Read256BytesBlockWidthC[k] > 0) {
					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
								Read256BytesBlockWidthC[k]),
								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
								* Read256BytesBlockWidthC[k] - 1, 8 *
								Read256BytesBlockWidthC[k]) -
								dml_floor(ViewportXStartC[k], 8 *
								Read256BytesBlockWidthC[k])) *
								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
								Read256BytesBlockHeightC[k]),
								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
								8 * Read256BytesBlockHeightC[k] - 1, 8 *
								Read256BytesBlockHeightC[k]) -
								dml_floor(ViewportYStartC[k], 8 *
								Read256BytesBlockHeightC[k])) *
								BytesPerPixelC[k] / 256;
				}
			}
		} else {
			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
							BytesPerPixelY[k];
			if (ReadBlockWidthC[k] > 0) {
				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
								BytesPerPixelC[k];
			}
			if (DCCEnable[k] == true) {
				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
								Read256BytesBlockWidthY[k] - 1), 8 *
								Read256BytesBlockWidthY[k]) *
						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
								Read256BytesBlockHeightY[k] - 1), 8 *
								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);

				if (Read256BytesBlockWidthC[k] > 0) {
					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
									Read256BytesBlockWidthC[k] - 1), 8 *
									Read256BytesBlockWidthC[k]) *
							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
									Read256BytesBlockHeightC[k] - 1), 8 *
									Read256BytesBlockHeightC[k]) *
									BytesPerPixelC[k] / 256;
				}
			}
		}
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		/* SS and Subvp counted separate as they are never used at the same time */
		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
	}
	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
} // CalculateSurfaceSizeInMall

void dml32_CalculateVMRowAndSwath(
		unsigned int NumberOfActiveSurfaces,
		DmlPipe myPipe[],
		unsigned int SurfaceSizeInMALL[],
		unsigned int PTEBufferSizeInRequestsLuma,
		unsigned int PTEBufferSizeInRequestsChroma,
		unsigned int DCCMetaBufferSizeBytes,
		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
		unsigned int MALLAllocatedForDCN,
		double SwathWidthY[],
		double SwathWidthC[],
		bool GPUVMEnable,
		bool HostVMEnable,
		unsigned int HostVMMaxNonCachedPageTableLevels,
		unsigned int GPUVMMaxPageTableLevels,
		unsigned int GPUVMMinPageSizeKBytes[],
		unsigned int HostVMMinPageSize,

		/* Output */
		bool PTEBufferSizeNotExceeded[],
		bool DCCMetaBufferSizeNotExceeded[],
		unsigned int dpte_row_width_luma_ub[],
		unsigned int dpte_row_width_chroma_ub[],
		unsigned int dpte_row_height_luma[],
		unsigned int dpte_row_height_chroma[],
		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
		unsigned int meta_req_width[],
		unsigned int meta_req_width_chroma[],
		unsigned int meta_req_height[],
		unsigned int meta_req_height_chroma[],
		unsigned int meta_row_width[],
		unsigned int meta_row_width_chroma[],
		unsigned int meta_row_height[],
		unsigned int meta_row_height_chroma[],
		unsigned int vm_group_bytes[],
		unsigned int dpte_group_bytes[],
		unsigned int PixelPTEReqWidthY[],
		unsigned int PixelPTEReqHeightY[],
		unsigned int PTERequestSizeY[],
		unsigned int PixelPTEReqWidthC[],
		unsigned int PixelPTEReqHeightC[],
		unsigned int PTERequestSizeC[],
		unsigned int dpde0_bytes_per_frame_ub_l[],
		unsigned int meta_pte_bytes_per_frame_ub_l[],
		unsigned int dpde0_bytes_per_frame_ub_c[],
		unsigned int meta_pte_bytes_per_frame_ub_c[],
		double PrefetchSourceLinesY[],
		double PrefetchSourceLinesC[],
		double VInitPreFillY[],
		double VInitPreFillC[],
		unsigned int MaxNumSwathY[],
		unsigned int MaxNumSwathC[],
		double meta_row_bw[],
		double dpte_row_bw[],
		double PixelPTEBytesPerRow[],
		double PDEAndMetaPTEBytesFrame[],
		double MetaRowByte[],
		bool use_one_row_for_frame[],
		bool use_one_row_for_frame_flip[],
		bool UsesMALLForStaticScreen[],
		bool PTE_BUFFER_MODE[],
		unsigned int BIGK_FRAGMENT_SIZE[])
{
	unsigned int k;
	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
	unsigned int PDEAndMetaPTEBytesFrameY;
	unsigned int PDEAndMetaPTEBytesFrameC;
	unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
	unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (HostVMEnable == true) {
			vm_group_bytes[k] = 512;
			dpte_group_bytes[k] = 512;
		} else if (GPUVMEnable == true) {
			vm_group_bytes[k] = 2048;
			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
				dpte_group_bytes[k] = 512;
			else
				dpte_group_bytes[k] = 2048;
		} else {
			vm_group_bytes[k] = 0;
			dpte_group_bytes[k] = 0;
		}

		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
				myPipe[k].SourcePixelFormat == dm_420_12 ||
				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
					!IsVertical(myPipe[k].SourceRotation)) {
				PTEBufferSizeInRequestsForLuma[k] =
						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
			} else {
				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
			}

			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
					myPipe[k].ViewportStationary,
					myPipe[k].DCCEnable,
					myPipe[k].DPPPerSurface,
					myPipe[k].BlockHeight256BytesC,
					myPipe[k].BlockWidth256BytesC,
					myPipe[k].SourcePixelFormat,
					myPipe[k].SurfaceTiling,
					myPipe[k].BytePerPixelC,
					myPipe[k].SourceRotation,
					SwathWidthC[k],
					myPipe[k].ViewportHeightChroma,
					myPipe[k].ViewportXStartC,
					myPipe[k].ViewportYStartC,
					GPUVMEnable,
					HostVMEnable,
					HostVMMaxNonCachedPageTableLevels,
					GPUVMMaxPageTableLevels,
					GPUVMMinPageSizeKBytes[k],
					HostVMMinPageSize,
					PTEBufferSizeInRequestsForChroma[k],
					myPipe[k].PitchC,
					myPipe[k].DCCMetaPitchC,
					myPipe[k].BlockWidthC,
					myPipe[k].BlockHeightC,

					/* Output */
					&MetaRowByteC[k],
					&PixelPTEBytesPerRowC[k],
					&dpte_row_width_chroma_ub[k],
					&dpte_row_height_chroma[k],
					&dpte_row_height_linear_chroma[k],
					&PixelPTEBytesPerRowC_one_row_per_frame[k],
					&dpte_row_width_chroma_ub_one_row_per_frame[k],
					&dpte_row_height_chroma_one_row_per_frame[k],
					&meta_req_width_chroma[k],
					&meta_req_height_chroma[k],
					&meta_row_width_chroma[k],
					&meta_row_height_chroma[k],
					&PixelPTEReqWidthC[k],
					&PixelPTEReqHeightC[k],
					&PTERequestSizeC[k],
					&dpde0_bytes_per_frame_ub_c[k],
					&meta_pte_bytes_per_frame_ub_c[k]);

			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
					myPipe[k].VRatioChroma,
					myPipe[k].VTapsChroma,
					myPipe[k].InterlaceEnable,
					myPipe[k].ProgressiveToInterlaceUnitInOPP,
					myPipe[k].SwathHeightC,
					myPipe[k].SourceRotation,
					myPipe[k].ViewportStationary,
					SwathWidthC[k],
					myPipe[k].ViewportHeightChroma,
					myPipe[k].ViewportXStartC,
					myPipe[k].ViewportYStartC,

					/* Output */
					&VInitPreFillC[k],
					&MaxNumSwathC[k]);
		} else {
			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
			PTEBufferSizeInRequestsForChroma[k] = 0;
			PixelPTEBytesPerRowC[k] = 0;
			PDEAndMetaPTEBytesFrameC = 0;
			MetaRowByteC[k] = 0;
			MaxNumSwathC[k] = 0;
			PrefetchSourceLinesC[k] = 0;
			dpte_row_height_chroma_one_row_per_frame[k] = 0;
			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
		}

		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
				myPipe[k].ViewportStationary,
				myPipe[k].DCCEnable,
				myPipe[k].DPPPerSurface,
				myPipe[k].BlockHeight256BytesY,
				myPipe[k].BlockWidth256BytesY,
				myPipe[k].SourcePixelFormat,
				myPipe[k].SurfaceTiling,
				myPipe[k].BytePerPixelY,
				myPipe[k].SourceRotation,
				SwathWidthY[k],
				myPipe[k].ViewportHeight,
				myPipe[k].ViewportXStart,
				myPipe[k].ViewportYStart,
				GPUVMEnable,
				HostVMEnable,
				HostVMMaxNonCachedPageTableLevels,
				GPUVMMaxPageTableLevels,
				GPUVMMinPageSizeKBytes[k],
				HostVMMinPageSize,
				PTEBufferSizeInRequestsForLuma[k],
				myPipe[k].PitchY,
				myPipe[k].DCCMetaPitchY,
				myPipe[k].BlockWidthY,
				myPipe[k].BlockHeightY,

				/* Output */
				&MetaRowByteY[k],
				&PixelPTEBytesPerRowY[k],
				&dpte_row_width_luma_ub[k],
				&dpte_row_height_luma[k],
				&dpte_row_height_linear_luma[k],
				&PixelPTEBytesPerRowY_one_row_per_frame[k],
				&dpte_row_width_luma_ub_one_row_per_frame[k],
				&dpte_row_height_luma_one_row_per_frame[k],
				&meta_req_width[k],
				&meta_req_height[k],
				&meta_row_width[k],
				&meta_row_height[k],
				&PixelPTEReqWidthY[k],
				&PixelPTEReqHeightY[k],
				&PTERequestSizeY[k],
				&dpde0_bytes_per_frame_ub_l[k],
				&meta_pte_bytes_per_frame_ub_l[k]);

		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
				myPipe[k].VRatio,
				myPipe[k].VTaps,
				myPipe[k].InterlaceEnable,
				myPipe[k].ProgressiveToInterlaceUnitInOPP,
				myPipe[k].SwathHeightY,
				myPipe[k].SourceRotation,
				myPipe[k].ViewportStationary,
				SwathWidthY[k],
				myPipe[k].ViewportHeight,
				myPipe[k].ViewportXStart,
				myPipe[k].ViewportYStart,

				/* Output */
				&VInitPreFillY[k],
				&MaxNumSwathY[k]);

		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];

		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
			PTEBufferSizeNotExceeded[k] = true;
		} else {
			PTEBufferSizeNotExceeded[k] = false;
		}

		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
			PTEBufferSizeInRequestsForLuma[k] &&
			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
	}

	dml32_CalculateMALLUseForStaticScreen(
			NumberOfActiveSurfaces,
			MALLAllocatedForDCN,
			UseMALLForStaticScreen,   // mode
			SurfaceSizeInMALL,
			one_row_per_frame_fits_in_buffer,
			/* Output */
			UsesMALLForStaticScreen); // boolen

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
				(GPUVMMinPageSizeKBytes[k] > 64);
		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
#endif
		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));

		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);

		if (use_one_row_for_frame[k]) {
			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
		}

		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
			DCCMetaBufferSizeNotExceeded[k] = true;
		else
			DCCMetaBufferSizeNotExceeded[k] = false;

		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
		if (use_one_row_for_frame[k])
			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;

		dml32_CalculateRowBandwidth(
				GPUVMEnable,
				myPipe[k].SourcePixelFormat,
				myPipe[k].VRatio,
				myPipe[k].VRatioChroma,
				myPipe[k].DCCEnable,
				myPipe[k].HTotal / myPipe[k].PixelClock,
				MetaRowByteY[k], MetaRowByteC[k],
				meta_row_height[k],
				meta_row_height_chroma[k],
				PixelPTEBytesPerRowY[k],
				PixelPTEBytesPerRowC[k],
				dpte_row_height_luma[k],
				dpte_row_height_chroma[k],

				/* Output */
				&meta_row_bw[k],
				&dpte_row_bw[k]);
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
				__func__, k, use_one_row_for_frame_flip[k]);
		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
				__func__, k, UseMALLForPStateChange[k]);
		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
				__func__, k, dpte_row_width_luma_ub[k]);
		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
				__func__, k, dpte_row_height_chroma[k]);
		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
				__func__, k, dpte_row_width_chroma_ub[k]);
		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
				__func__, k, PTEBufferSizeNotExceeded[k]);
		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
#endif
	}
} // CalculateVMRowAndSwath

unsigned int dml32_CalculateVMAndRowBytes(
		bool ViewportStationary,
		bool DCCEnable,
		unsigned int NumberOfDPPs,
		unsigned int BlockHeight256Bytes,
		unsigned int BlockWidth256Bytes,
		enum source_format_class SourcePixelFormat,
		unsigned int SurfaceTiling,
		unsigned int BytePerPixel,
		enum dm_rotation_angle SourceRotation,
		double SwathWidth,
		unsigned int ViewportHeight,
		unsigned int    ViewportXStart,
		unsigned int    ViewportYStart,
		bool GPUVMEnable,
		bool HostVMEnable,
		unsigned int HostVMMaxNonCachedPageTableLevels,
		unsigned int GPUVMMaxPageTableLevels,
		unsigned int GPUVMMinPageSizeKBytes,
		unsigned int HostVMMinPageSize,
		unsigned int PTEBufferSizeInRequests,
		unsigned int Pitch,
		unsigned int DCCMetaPitch,
		unsigned int MacroTileWidth,
		unsigned int MacroTileHeight,

		/* Output */
		unsigned int *MetaRowByte,
		unsigned int *PixelPTEBytesPerRow,
		unsigned int    *dpte_row_width_ub,
		unsigned int *dpte_row_height,
		unsigned int *dpte_row_height_linear,
		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
		unsigned int    *dpte_row_width_ub_one_row_per_frame,
		unsigned int    *dpte_row_height_one_row_per_frame,
		unsigned int *MetaRequestWidth,
		unsigned int *MetaRequestHeight,
		unsigned int *meta_row_width,
		unsigned int *meta_row_height,
		unsigned int *PixelPTEReqWidth,
		unsigned int *PixelPTEReqHeight,
		unsigned int *PTERequestSize,
		unsigned int    *DPDE0BytesFrame,
		unsigned int    *MetaPTEBytesFrame)
{
	unsigned int MPDEBytesFrame;
	unsigned int DCCMetaSurfaceBytes;
	unsigned int ExtraDPDEBytesFrame;
	unsigned int PDEAndMetaPTEBytesFrame;
	unsigned int HostVMDynamicLevels = 0;
	unsigned int    MacroTileSizeBytes;
	unsigned int    vp_height_meta_ub;
	unsigned int    vp_height_dpte_ub;
	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this

	if (GPUVMEnable == true && HostVMEnable == true) {
		if (HostVMMinPageSize < 2048)
			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
		else
			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
	}

	*MetaRequestHeight = 8 * BlockHeight256Bytes;
	*MetaRequestWidth = 8 * BlockWidth256Bytes;
	if (SurfaceTiling == dm_sw_linear) {
		*meta_row_height = 32;
		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
				- dml_floor(ViewportXStart, *MetaRequestWidth);
	} else if (!IsVertical(SourceRotation)) {
		*meta_row_height = *MetaRequestHeight;
		if (ViewportStationary && NumberOfDPPs == 1) {
			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
		} else {
			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
		}
		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
	} else {
		*meta_row_height = *MetaRequestWidth;
		if (ViewportStationary && NumberOfDPPs == 1) {
			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
		} else {
			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
		}
		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
	}

	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
	} else if (!IsVertical(SourceRotation)) {
		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
	} else {
		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
	}

	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;

	if (GPUVMEnable == true) {
		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
				(8 * 4.0 * 1024), 1) + 1) * 64;
		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
	} else {
		*MetaPTEBytesFrame = 0;
		MPDEBytesFrame = 0;
	}

	if (DCCEnable != true) {
		*MetaPTEBytesFrame = 0;
		MPDEBytesFrame = 0;
		*MetaRowByte = 0;
	}

	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;

	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
					MacroTileHeight - 1, MacroTileHeight) -
					dml_floor(ViewportYStart, MacroTileHeight);
		} else if (!IsVertical(SourceRotation)) {
			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
		} else {
			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
		}
		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
				(8 * 2097152), 1) + 1);
		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
	} else {
		*DPDE0BytesFrame = 0;
		ExtraDPDEBytesFrame = 0;
		vp_height_dpte_ub = 0;
	}

	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
#endif

	if (HostVMEnable == true)
		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);

	if (SurfaceTiling == dm_sw_linear) {
		*PixelPTEReqHeight = 1;
		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
		*PTERequestSize = 64;
	} else if (GPUVMMinPageSizeKBytes == 4) {
		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
		*PTERequestSize = 128;
	} else {
		*PixelPTEReqHeight = MacroTileHeight;
		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
		*PTERequestSize = 64;
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
#endif

	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
					(double) *PixelPTEReqWidth;
	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
			*PTERequestSize;

	if (SurfaceTiling == dm_sw_linear) {
		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
				*PixelPTEReqWidth / Pitch), 1));
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
						*PixelPTEReqWidth / Pitch), 1));
		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
#endif
		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;

		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
				PixelPTEReqWidth_linear / Pitch), 1);
		if (*dpte_row_height_linear > 128)
			*dpte_row_height_linear = 128;

	} else if (!IsVertical(SourceRotation)) {
		*dpte_row_height = *PixelPTEReqHeight;

		if (GPUVMMinPageSizeKBytes > 64) {
			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
					dml_floor(ViewportXStart, *PixelPTEReqWidth);
		} else {
			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
					*PixelPTEReqWidth;
		}

		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
	} else {
		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);

		if (ViewportStationary && (NumberOfDPPs == 1)) {
			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
		} else {
			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
					* *PixelPTEReqHeight;
		}

		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
	}

	if (GPUVMEnable != true)
		*PixelPTEBytesPerRow = 0;
	if (HostVMEnable == true)
		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
			__func__, *dpte_row_width_ub_one_row_per_frame);
	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
			*MetaPTEBytesFrame);
#endif

	return PDEAndMetaPTEBytesFrame;
} // CalculateVMAndRowBytes

double dml32_CalculatePrefetchSourceLines(
		double VRatio,
		unsigned int VTaps,
		bool Interlace,
		bool ProgressiveToInterlaceUnitInOPP,
		unsigned int SwathHeight,
		enum dm_rotation_angle SourceRotation,
		bool ViewportStationary,
		double SwathWidth,
		unsigned int ViewportHeight,
		unsigned int ViewportXStart,
		unsigned int ViewportYStart,

		/* Output */
		double *VInitPreFill,
		unsigned int *MaxNumSwath)
{

	unsigned int vp_start_rot;
	unsigned int sw0_tmp;
	unsigned int MaxPartialSwath;
	double numLines;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
#endif
	if (ProgressiveToInterlaceUnitInOPP)
		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
	else
		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);

	if (ViewportStationary) {
		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
			vp_start_rot = SwathHeight -
					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
			vp_start_rot = ViewportXStart;
		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
			vp_start_rot = SwathHeight -
					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
		} else {
			vp_start_rot = ViewportYStart;
		}
		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
		if (sw0_tmp < *VInitPreFill)
			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
		else
			*MaxNumSwath = 1;
		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
	} else {
		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
		if (*VInitPreFill > 1)
			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
		else
			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
	}
	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
#endif
	return numLines;

} // CalculatePrefetchSourceLines

void dml32_CalculateMALLUseForStaticScreen(
		unsigned int NumberOfActiveSurfaces,
		unsigned int MALLAllocatedForDCNFinal,
		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
		unsigned int SurfaceSizeInMALL[],
		bool one_row_per_frame_fits_in_buffer[],

		/* output */
		bool UsesMALLForStaticScreen[])
{
	unsigned int k;
	unsigned int SurfaceToAddToMALL;
	bool CanAddAnotherSurfaceToMALL;
	unsigned int TotalSurfaceSizeInMALL;

	TotalSurfaceSizeInMALL = 0;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
		if (UsesMALLForStaticScreen[k])
			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
#endif
	}

	SurfaceToAddToMALL = 0;
	CanAddAnotherSurfaceToMALL = true;
	while (CanAddAnotherSurfaceToMALL) {
		CanAddAnotherSurfaceToMALL = false;
		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
					!UsesMALLForStaticScreen[k] &&
					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
					one_row_per_frame_fits_in_buffer[k] &&
					(!CanAddAnotherSurfaceToMALL ||
					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
				CanAddAnotherSurfaceToMALL = true;
				SurfaceToAddToMALL = k;
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
						__func__, k, UseMALLForStaticScreen[k]);
#endif
			}
		}
		if (CanAddAnotherSurfaceToMALL) {
			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
#endif

		}
	}
}

void dml32_CalculateRowBandwidth(
		bool GPUVMEnable,
		enum source_format_class SourcePixelFormat,
		double VRatio,
		double VRatioChroma,
		bool DCCEnable,
		double LineTime,
		unsigned int MetaRowByteLuma,
		unsigned int MetaRowByteChroma,
		unsigned int meta_row_height_luma,
		unsigned int meta_row_height_chroma,
		unsigned int PixelPTEBytesPerRowLuma,
		unsigned int PixelPTEBytesPerRowChroma,
		unsigned int dpte_row_height_luma,
		unsigned int dpte_row_height_chroma,
		/* Output */
		double *meta_row_bw,
		double *dpte_row_bw)
{
	if (DCCEnable != true) {
		*meta_row_bw = 0;
	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
			SourcePixelFormat == dm_rgbe_alpha) {
		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
	} else {
		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
	}

	if (GPUVMEnable != true) {
		*dpte_row_bw = 0;
	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
			SourcePixelFormat == dm_rgbe_alpha) {
		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
	} else {
		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
	}
}

double dml32_CalculateUrgentLatency(
		double UrgentLatencyPixelDataOnly,
		double UrgentLatencyPixelMixedWithVMData,
		double UrgentLatencyVMDataOnly,
		bool   DoUrgentLatencyAdjustment,
		double UrgentLatencyAdjustmentFabricClockComponent,
		double UrgentLatencyAdjustmentFabricClockReference,
		double FabricClock)
{
	double   ret;

	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
	if (DoUrgentLatencyAdjustment == true) {
		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
	}
	return ret;
}

void dml32_CalculateUrgentBurstFactor(
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
		unsigned int    swath_width_luma_ub,
		unsigned int    swath_width_chroma_ub,
		unsigned int SwathHeightY,
		unsigned int SwathHeightC,
		double  LineTime,
		double  UrgentLatency,
		double  CursorBufferSize,
		unsigned int CursorWidth,
		unsigned int CursorBPP,
		double  VRatio,
		double  VRatioC,
		double  BytePerPixelInDETY,
		double  BytePerPixelInDETC,
		unsigned int    DETBufferSizeY,
		unsigned int    DETBufferSizeC,
		/* Output */
		double *UrgentBurstFactorCursor,
		double *UrgentBurstFactorLuma,
		double *UrgentBurstFactorChroma,
		bool   *NotEnoughUrgentLatencyHiding)
{
	double       LinesInDETLuma;
	double       LinesInDETChroma;
	unsigned int LinesInCursorBuffer;
	double       CursorBufferSizeInTime;
	double       DETBufferSizeInTimeLuma;
	double       DETBufferSizeInTimeChroma;

	*NotEnoughUrgentLatencyHiding = 0;

	if (CursorWidth > 0) {
		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
				(CursorWidth * CursorBPP / 8.0)), 1.0);
		if (VRatio > 0) {
			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
				*NotEnoughUrgentLatencyHiding = 1;
				*UrgentBurstFactorCursor = 0;
			} else {
				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
						(CursorBufferSizeInTime - UrgentLatency);
			}
		} else {
			*UrgentBurstFactorCursor = 1;
		}
	}

	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;

	if (VRatio > 0) {
		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
			*NotEnoughUrgentLatencyHiding = 1;
			*UrgentBurstFactorLuma = 0;
		} else {
			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
		}
	} else {
		*UrgentBurstFactorLuma = 1;
	}

	if (BytePerPixelInDETC > 0) {
		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
					/ swath_width_chroma_ub;

		if (VRatio > 0) {
			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
				*NotEnoughUrgentLatencyHiding = 1;
				*UrgentBurstFactorChroma = 0;
			} else {
				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
						/ (DETBufferSizeInTimeChroma - UrgentLatency);
			}
		} else {
			*UrgentBurstFactorChroma = 1;
		}
	}
} // CalculateUrgentBurstFactor

void dml32_CalculateDCFCLKDeepSleep(
		unsigned int NumberOfActiveSurfaces,
		unsigned int BytePerPixelY[],
		unsigned int BytePerPixelC[],
		double VRatio[],
		double VRatioChroma[],
		double SwathWidthY[],
		double SwathWidthC[],
		unsigned int DPPPerSurface[],
		double HRatio[],
		double HRatioChroma[],
		double PixelClock[],
		double PSCL_THROUGHPUT[],
		double PSCL_THROUGHPUT_CHROMA[],
		double Dppclk[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		unsigned int ReturnBusWidth,

		/* Output */
		double *DCFClkDeepSleep)
{
	unsigned int k;
	double   DisplayPipeLineDeliveryTimeLuma;
	double   DisplayPipeLineDeliveryTimeChroma;
	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
	double ReadBandwidth = 0.0;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {

		if (VRatio[k] <= 1) {
			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
					/ PixelClock[k];
		} else {
			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
		}
		if (BytePerPixelC[k] == 0) {
			DisplayPipeLineDeliveryTimeChroma = 0;
		} else {
			if (VRatioChroma[k] <= 1) {
				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
			} else {
				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
						/ Dppclk[k];
			}
		}

		if (BytePerPixelC[k] > 0) {
			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
					32.0 / DisplayPipeLineDeliveryTimeChroma);
		} else {
			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
					64.0 / DisplayPipeLineDeliveryTimeLuma;
		}
		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
#endif
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k)
		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];

	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
#endif

	for (k = 0; k < NumberOfActiveSurfaces; ++k)
		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
#endif
} // CalculateDCFCLKDeepSleep

double dml32_CalculateWriteBackDelay(
		enum source_format_class WritebackPixelFormat,
		double WritebackHRatio,
		double WritebackVRatio,
		unsigned int WritebackVTaps,
		unsigned int         WritebackDestinationWidth,
		unsigned int         WritebackDestinationHeight,
		unsigned int         WritebackSourceHeight,
		unsigned int HTotal)
{
	double CalculateWriteBackDelay;
	double Line_length;
	double Output_lines_last_notclamped;
	double WritebackVInit;

	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
	Line_length = dml_max((double) WritebackDestinationWidth,
			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
			dml_ceil(((double)WritebackSourceHeight -
					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
	if (Output_lines_last_notclamped < 0) {
		CalculateWriteBackDelay = 0;
	} else {
		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
				(HTotal - WritebackDestinationWidth) + 80;
	}
	return CalculateWriteBackDelay;
}

void dml32_UseMinimumDCFCLK(
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
		bool DRRDisplay[],
		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
		unsigned int MaxInterDCNTileRepeaters,
		unsigned int MaxPrefetchMode,
		double DRAMClockChangeLatencyFinal,
		double FCLKChangeLatency,
		double SREnterPlusExitTime,
		unsigned int ReturnBusWidth,
		unsigned int RoundTripPingLatencyCycles,
		unsigned int ReorderingBytes,
		unsigned int PixelChunkSizeInKByte,
		unsigned int MetaChunkSize,
		bool GPUVMEnable,
		unsigned int GPUVMMaxPageTableLevels,
		bool HostVMEnable,
		unsigned int NumberOfActiveSurfaces,
		double HostVMMinPageSize,
		unsigned int HostVMMaxNonCachedPageTableLevels,
		bool DynamicMetadataVMEnabled,
		bool ImmediateFlipRequirement,
		bool ProgressiveToInterlaceUnitInOPP,
		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
		unsigned int VTotal[],
		unsigned int VActive[],
		unsigned int DynamicMetadataTransmittedBytes[],
		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
		bool Interlace[],
		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
		double RequiredDISPCLK[][2],
		double UrgLatency[],
		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
		double ProjectedDCFClkDeepSleep[][2],
		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
		unsigned int TotalNumberOfActiveDPP[][2],
		unsigned int TotalNumberOfDCCActiveDPP[][2],
		unsigned int dpte_group_bytes[],
		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
		unsigned int BytePerPixelY[],
		unsigned int BytePerPixelC[],
		unsigned int HTotal[],
		double PixelClock[],
		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
		bool DynamicMetadataEnable[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double DCFCLKPerState[],
		/* Output */
		double DCFCLKState[][2])
{
	unsigned int i, j, k;
	unsigned int     dummy1;
	double dummy2, dummy3;
	double   NormalEfficiency;
	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];

	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
		for  (j = 0; j <= 1; ++j) {
			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
			double MinimumTWait = 0.0;
			double DPTEBandwidth;
			double DCFCLKRequiredForAverageBandwidth;
			unsigned int ExtraLatencyBytes;
			double ExtraLatencyCycles;
			double DCFCLKRequiredForPeakBandwidth;
			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
			double MinimumTvmPlus2Tr0;

			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
								/ (15.75 * HTotal[k] / PixelClock[k]);
			}

			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
				NoOfDPPState[k] = NoOfDPP[i][j][k];

			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);

			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
					HostVMMaxNonCachedPageTableLevels);
			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
				double DCFCLKCyclesRequiredInPrefetch;
				double PrefetchTime;

				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
								* BytePerPixelC[k]) / NormalEfficiency
						/ ReturnBusWidth;
				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
								/ NormalEfficiency / ReturnBusWidth
								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
								/ ReturnBusWidth
						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
						* HTotal[k] / PixelClock[k];
				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
						UrgLatency[i] * GPUVMMaxPageTableLevels *
						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;

				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
						UseMALLForPStateChange[k],
						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
						DRRDisplay[k],
						DRAMClockChangeLatencyFinal,
						FCLKChangeLatency,
						UrgLatency[i],
						SREnterPlusExitTime);

				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
						MinimumTWait - UrgLatency[i] *
						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
						DynamicMetadataVMExtraLatency[k];

				if (PrefetchTime > 0) {
					double ExpectedVRatioPrefetch;

					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
							PixelDCFCLKCyclesRequiredInPrefetch[k] /
							DCFCLKCyclesRequiredInPrefetch);
					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
							PixelDCFCLKCyclesRequiredInPrefetch[k] /
							PrefetchPixelLinesTime[k] *
							dml_max(1.0, ExpectedVRatioPrefetch) *
							dml_max(1.0, ExpectedVRatioPrefetch / 4);
					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
								NormalEfficiency / ReturnBusWidth;
					}
				} else {
					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
				}
				if (DynamicMetadataEnable[k] == true) {
					double TSetupPipe;
					double TdmbfPipe;
					double TdmsksPipe;
					double TdmecPipe;
					double AllowedTimeForUrgentExtraLatency;

					dml32_CalculateVUpdateAndDynamicMetadataParameters(
							MaxInterDCNTileRepeaters,
							RequiredDPPCLKPerSurface[i][j][k],
							RequiredDISPCLK[i][j],
							ProjectedDCFClkDeepSleep[i][j],
							PixelClock[k],
							HTotal[k],
							VTotal[k] - VActive[k],
							DynamicMetadataTransmittedBytes[k],
							DynamicMetadataLinesBeforeActiveRequired[k],
							Interlace[k],
							ProgressiveToInterlaceUnitInOPP,

							/* output */
							&TSetupPipe,
							&TdmbfPipe,
							&TdmecPipe,
							&TdmsksPipe,
							&dummy1,
							&dummy2,
							&dummy3);
					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
					if (AllowedTimeForUrgentExtraLatency > 0)
						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
					else
						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
				}
			}
			DCFCLKRequiredForPeakBandwidth = 0;
			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
						DCFCLKRequiredForPeakBandwidthPerSurface[k];
			}
			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
				double MaximumTvmPlus2Tr0PlusTsw;

				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
				} else {
					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
								MinimumTvmPlus2Tr0 -
								PrefetchPixelLinesTime[k] / 4),
							(2 * ExtraLatencyCycles +
								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
				}
			}
			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
		}
	}
}

unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
		unsigned int TotalNumberOfActiveDPP,
		unsigned int PixelChunkSizeInKByte,
		unsigned int TotalNumberOfDCCActiveDPP,
		unsigned int MetaChunkSize,
		bool GPUVMEnable,
		bool HostVMEnable,
		unsigned int NumberOfActiveSurfaces,
		unsigned int NumberOfDPP[],
		unsigned int dpte_group_bytes[],
		double HostVMInefficiencyFactor,
		double HostVMMinPageSize,
		unsigned int HostVMMaxNonCachedPageTableLevels)
{
	unsigned int k;
	double   ret;
	unsigned int  HostVMDynamicLevels;

	if (GPUVMEnable == true && HostVMEnable == true) {
		if (HostVMMinPageSize < 2048)
			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
		else
			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
	} else {
		HostVMDynamicLevels = 0;
	}

	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;

	if (GPUVMEnable == true) {
		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
		}
	}
	return ret;
}

void dml32_CalculateVUpdateAndDynamicMetadataParameters(
		unsigned int MaxInterDCNTileRepeaters,
		double Dppclk,
		double Dispclk,
		double DCFClkDeepSleep,
		double PixelClock,
		unsigned int HTotal,
		unsigned int VBlank,
		unsigned int DynamicMetadataTransmittedBytes,
		unsigned int DynamicMetadataLinesBeforeActiveRequired,
		unsigned int InterlaceEnable,
		bool ProgressiveToInterlaceUnitInOPP,

		/* output */
		double *TSetup,
		double *Tdmbf,
		double *Tdmec,
		double *Tdmsks,
		unsigned int *VUpdateOffsetPix,
		double *VUpdateWidthPix,
		double *VReadyOffsetPix)
{
	double TotalRepeaterDelayTime;

	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
	*VUpdateWidthPix  =
			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
	*Tdmec = HTotal / PixelClock;

	if (DynamicMetadataLinesBeforeActiveRequired == 0)
		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
	else
		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;

	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
		*Tdmsks = *Tdmsks / 2;
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);

	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
			__func__, DynamicMetadataLinesBeforeActiveRequired);
	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
#endif
}

double dml32_CalculateTWait(
		unsigned int PrefetchMode,
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
		bool DRRDisplay,
		double DRAMClockChangeLatency,
		double FCLKChangeLatency,
		double UrgentLatency,
		double SREnterPlusExitTime)
{
	double TWait = 0.0;

	if (PrefetchMode == 0 &&
			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
	} else {
		TWait = UrgentLatency;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
#endif
	return TWait;
} // CalculateTWait

// Function: get_return_bw_mbps
// Megabyte per second
double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
		const int VoltageLevel,
		const bool HostVMEnable,
		const double DCFCLK,
		const double FabricClock,
		const double DRAMSpeed)
{
	double ReturnBW = 0.;
	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);

	if (HostVMEnable != true)
		ReturnBW = PixelDataOnlyReturnBW;
	else
		ReturnBW = PixelMixedWithVMDataReturnBW;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
#endif
	return ReturnBW;
}

// Function: get_return_bw_mbps_vm_only
// Megabyte per second
double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
		const int VoltageLevel,
		const double DCFCLK,
		const double FabricClock,
		const double DRAMSpeed)
{
	double VMDataOnlyReturnBW = dml_min3(
			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
					* (VoltageLevel < 2 ?
							soc->pct_ideal_dram_bw_after_urgent_strobe :
							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
#endif
	return VMDataOnlyReturnBW;
}

double dml32_CalculateExtraLatency(
		unsigned int RoundTripPingLatencyCycles,
		unsigned int ReorderingBytes,
		double DCFCLK,
		unsigned int TotalNumberOfActiveDPP,
		unsigned int PixelChunkSizeInKByte,
		unsigned int TotalNumberOfDCCActiveDPP,
		unsigned int MetaChunkSize,
		double ReturnBW,
		bool GPUVMEnable,
		bool HostVMEnable,
		unsigned int NumberOfActiveSurfaces,
		unsigned int NumberOfDPP[],
		unsigned int dpte_group_bytes[],
		double HostVMInefficiencyFactor,
		double HostVMMinPageSize,
		unsigned int HostVMMaxNonCachedPageTableLevels)
{
	double ExtraLatencyBytes;
	double ExtraLatency;

	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
			ReorderingBytes,
			TotalNumberOfActiveDPP,
			PixelChunkSizeInKByte,
			TotalNumberOfDCCActiveDPP,
			MetaChunkSize,
			GPUVMEnable,
			HostVMEnable,
			NumberOfActiveSurfaces,
			NumberOfDPP,
			dpte_group_bytes,
			HostVMInefficiencyFactor,
			HostVMMinPageSize,
			HostVMMaxNonCachedPageTableLevels);

	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
#endif

	return ExtraLatency;
} // CalculateExtraLatency

bool dml32_CalculatePrefetchSchedule(
		struct vba_vars_st *v,
		unsigned int k,
		double HostVMInefficiencyFactor,
		DmlPipe *myPipe,
		unsigned int DSCDelay,
		unsigned int DPP_RECOUT_WIDTH,
		unsigned int VStartup,
		unsigned int MaxVStartup,
		double UrgentLatency,
		double UrgentExtraLatency,
		double TCalc,
		unsigned int PDEAndMetaPTEBytesFrame,
		unsigned int MetaRowByte,
		unsigned int PixelPTEBytesPerRow,
		double PrefetchSourceLinesY,
		unsigned int SwathWidthY,
		unsigned int VInitPreFillY,
		unsigned int MaxNumSwathY,
		double PrefetchSourceLinesC,
		unsigned int SwathWidthC,
		unsigned int VInitPreFillC,
		unsigned int MaxNumSwathC,
		unsigned int swath_width_luma_ub,
		unsigned int swath_width_chroma_ub,
		unsigned int SwathHeightY,
		unsigned int SwathHeightC,
		double TWait,
		double TPreReq,
		bool ExtendPrefetchIfPossible,
		/* Output */
		double   *DSTXAfterScaler,
		double   *DSTYAfterScaler,
		double *DestinationLinesForPrefetch,
		double *PrefetchBandwidth,
		double *DestinationLinesToRequestVMInVBlank,
		double *DestinationLinesToRequestRowInVBlank,
		double *VRatioPrefetchY,
		double *VRatioPrefetchC,
		double *RequiredPrefetchPixDataBWLuma,
		double *RequiredPrefetchPixDataBWChroma,
		bool   *NotEnoughTimeForDynamicMetadata,
		double *Tno_bw,
		double *prefetch_vmrow_bw,
		double *Tdmdl_vm,
		double *Tdmdl,
		double *TSetup,
		unsigned int   *VUpdateOffsetPix,
		double   *VUpdateWidthPix,
		double   *VReadyOffsetPix)
{
	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
	bool MyError = false;
	unsigned int DPPCycles, DISPCLKCycles;
	double DSTTotalPixelsAfterScaler;
	double LineTime;
	double dst_y_prefetch_equ;
	double prefetch_bw_oto;
	double Tvm_oto;
	double Tr0_oto;
	double Tvm_oto_lines;
	double Tr0_oto_lines;
	double dst_y_prefetch_oto;
	double TimeForFetchingMetaPTE = 0;
	double TimeForFetchingRowInVBlank = 0;
	double LinesToRequestPrefetchPixelData = 0;
	double LinesForPrefetchBandwidth = 0;
	unsigned int HostVMDynamicLevelsTrips;
	double  trip_to_mem;
	double  Tvm_trips;
	double  Tr0_trips;
	double  Tvm_trips_rounded;
	double  Tr0_trips_rounded;
	double  Lsw_oto;
	double  Tpre_rounded;
	double  prefetch_bw_equ;
	double  Tvm_equ;
	double  Tr0_equ;
	double  Tdmbf;
	double  Tdmec;
	double  Tdmsks;
	double  prefetch_sw_bytes;
	double  bytes_pp;
	double  dep_bytes;
	unsigned int max_vratio_pre = v->MaxVRatioPre;
	double  min_Lsw;
	double  Tsw_est1 = 0;
	double  Tsw_est3 = 0;

	if (v->GPUVMEnable == true && v->HostVMEnable == true)
		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
	else
		HostVMDynamicLevelsTrips = 0;
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
#endif
	dml32_CalculateVUpdateAndDynamicMetadataParameters(
			v->MaxInterDCNTileRepeaters,
			myPipe->Dppclk,
			myPipe->Dispclk,
			myPipe->DCFClkDeepSleep,
			myPipe->PixelClock,
			myPipe->HTotal,
			myPipe->VBlank,
			v->DynamicMetadataTransmittedBytes[k],
			v->DynamicMetadataLinesBeforeActiveRequired[k],
			myPipe->InterlaceEnable,
			myPipe->ProgressiveToInterlaceUnitInOPP,
			TSetup,

			/* output */
			&Tdmbf,
			&Tdmec,
			&Tdmsks,
			VUpdateOffsetPix,
			VUpdateWidthPix,
			VReadyOffsetPix);

	LineTime = myPipe->HTotal / myPipe->PixelClock;
	trip_to_mem = UrgentLatency;
	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);

	if (v->DynamicMetadataVMEnabled == true)
		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
	else
		*Tdmdl = TWait + UrgentExtraLatency;

#ifdef __DML_VBA_ALLOW_DELTA__
	if (v->DynamicMetadataEnable[k] == false)
		*Tdmdl = 0.0;
#endif

	if (v->DynamicMetadataEnable[k] == true) {
		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
			*NotEnoughTimeForDynamicMetadata = true;
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
					__func__, Tdmbf);
			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
					__func__, Tdmsks);
			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
					__func__, *Tdmdl);
#endif
		} else {
			*NotEnoughTimeForDynamicMetadata = false;
		}
	} else {
		*NotEnoughTimeForDynamicMetadata = false;
	}

	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);

	if (myPipe->ScalerEnabled)
		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
	else
		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;

	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;

	DISPCLKCycles = v->DISPCLKDelaySubtotal;

	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
		return true;

	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;

	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
					myPipe->HActive / 2 : 0)
			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
#endif

	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
		*DSTYAfterScaler = 1;
	else
		*DSTYAfterScaler = 0;

	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
#endif

	MyError = false;

	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);

	if (v->GPUVMEnable == true) {
		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
		if (v->GPUVMMaxPageTableLevels >= 3) {
			*Tno_bw = UrgentExtraLatency + trip_to_mem *
					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
					4.0 * LineTime; // VBA_ERROR
			*Tno_bw = UrgentExtraLatency;
		} else {
			*Tno_bw = 0;
		}
	} else if (myPipe->DCCEnable == true) {
		Tvm_trips_rounded = LineTime / 4.0;
		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
		*Tno_bw = 0;
	} else {
		Tvm_trips_rounded = LineTime / 4.0;
		Tr0_trips_rounded = LineTime / 2.0;
		*Tno_bw = 0;
	}
	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);

	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
			|| myPipe->SourcePixelFormat == dm_420_12) {
		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
	} else {
		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
	}

	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));

	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
	min_Lsw = dml_max(min_Lsw, 1.0);
	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;

	if (v->GPUVMEnable == true) {
		Tvm_oto = dml_max3(
				Tvm_trips,
				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
				LineTime / 4.0);
	} else
		Tvm_oto = LineTime / 4.0;

	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
		Tr0_oto = dml_max4(
				Tr0_trips,
				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
				(LineTime - Tvm_oto)/2.0,
				LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
#endif
	} else
		Tr0_oto = (LineTime - Tvm_oto) / 2.0;

	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;

	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);

	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
#endif

	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
	Tpre_rounded = dst_y_prefetch_equ * LineTime;
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
			__func__, VStartup * LineTime);
	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
			__func__, *DSTYAfterScaler);
#endif
	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);

	if (prefetch_sw_bytes < dep_bytes)
		prefetch_sw_bytes = 2 * dep_bytes;

	*PrefetchBandwidth = 0;
	*DestinationLinesToRequestVMInVBlank = 0;
	*DestinationLinesToRequestRowInVBlank = 0;
	*VRatioPrefetchY = 0;
	*VRatioPrefetchC = 0;
	*RequiredPrefetchPixDataBWLuma = 0;
	if (dst_y_prefetch_equ > 1 &&
			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
		double PrefetchBandwidth1;
		double PrefetchBandwidth2;
		double PrefetchBandwidth3;
		double PrefetchBandwidth4;

		if (Tpre_rounded - *Tno_bw > 0) {
			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
		} else
			PrefetchBandwidth1 = 0;

		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
		}

		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
		else
			PrefetchBandwidth2 = 0;

		if (Tpre_rounded - Tvm_trips_rounded > 0) {
			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
		} else
			PrefetchBandwidth3 = 0;


		if (VStartup == MaxVStartup &&
				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
				LineTime - Tvm_trips_rounded > 0) {
			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
		}

		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
			PrefetchBandwidth4 = prefetch_sw_bytes /
					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
		} else {
			PrefetchBandwidth4 = 0;
		}

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
#endif
		{
			bool Case1OK;
			bool Case2OK;
			bool Case3OK;

			if (PrefetchBandwidth1 > 0) {
				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
						>= Tvm_trips_rounded
						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
					Case1OK = true;
				} else {
					Case1OK = false;
				}
			} else {
				Case1OK = false;
			}

			if (PrefetchBandwidth2 > 0) {
				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
						>= Tvm_trips_rounded
						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
					Case2OK = true;
				} else {
					Case2OK = false;
				}
			} else {
				Case2OK = false;
			}

			if (PrefetchBandwidth3 > 0) {
				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
								Tr0_trips_rounded) {
					Case3OK = true;
				} else {
					Case3OK = false;
				}
			} else {
				Case3OK = false;
			}

			if (Case1OK)
				prefetch_bw_equ = PrefetchBandwidth1;
			else if (Case2OK)
				prefetch_bw_equ = PrefetchBandwidth2;
			else if (Case3OK)
				prefetch_bw_equ = PrefetchBandwidth3;
			else
				prefetch_bw_equ = PrefetchBandwidth4;

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
#endif

			if (prefetch_bw_equ > 0) {
				if (v->GPUVMEnable == true) {
					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
							HostVMInefficiencyFactor / prefetch_bw_equ,
							Tvm_trips, LineTime / 4);
				} else {
					Tvm_equ = LineTime / 4;
				}

				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
							(LineTime - Tvm_equ) / 2, LineTime / 4);
				} else {
					Tr0_equ = (LineTime - Tvm_equ) / 2;
				}
			} else {
				Tvm_equ = 0;
				Tr0_equ = 0;
#ifdef __DML_VBA_DEBUG__
				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
#endif
			}
		}

		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
			if (dst_y_prefetch_oto * LineTime < TPreReq) {
				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
			} else {
				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
			}
			TimeForFetchingMetaPTE = Tvm_oto;
			TimeForFetchingRowInVBlank = Tr0_oto;
			*PrefetchBandwidth = prefetch_bw_oto;
			/* Clamp to oto for bandwidth calculation */
			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
		} else {
			/* For mode programming we want to extend the prefetch as much as possible
			 * (up to oto, or as long as we can for equ) if we're not already applying
			 * the 60us prefetch requirement. This is to avoid intermittent underflow
			 * issues during prefetch.
			 *
			 * The prefetch extension is applied under the following scenarios:
			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
			 * 2. We're using subvp or drr methods of p-state switch, in which case we
			 *    we don't care if prefetch takes up more of the blanking time
			 *
			 * Mode programming typically chooses the smallest prefetch time possible
			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
			 * apply this prefetch extension when p-state in vblank is not required (UCLK
			 * p-states take up the most vblank time).
			 */
			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
				MyError = true;
			} else {
				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
				TimeForFetchingMetaPTE = Tvm_equ;
				TimeForFetchingRowInVBlank = Tr0_equ;
				*PrefetchBandwidth = prefetch_bw_equ;
				/* Clamp to equ for bandwidth calculation */
				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
			}
		}

		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;

		*DestinationLinesToRequestRowInVBlank =
				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;

		LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
				__func__, *DestinationLinesToRequestVMInVBlank);
		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
				__func__, *DestinationLinesToRequestRowInVBlank);
		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
#endif

		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
#endif
			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
					*VRatioPrefetchY =
							dml_max((double) PrefetchSourceLinesY /
									LinesToRequestPrefetchPixelData,
									(double) MaxNumSwathY * SwathHeightY /
									(LinesToRequestPrefetchPixelData -
									(VInitPreFillY - 3.0) / 2.0));
					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
				} else {
					MyError = true;
					*VRatioPrefetchY = 0;
				}
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
#endif
			}

			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
#endif
			if ((SwathHeightC > 4)) {
				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
					*VRatioPrefetchC =
						dml_max(*VRatioPrefetchC,
							(double) MaxNumSwathC * SwathHeightC /
							(LinesToRequestPrefetchPixelData -
							(VInitPreFillC - 3.0) / 2.0));
					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
				} else {
					MyError = true;
					*VRatioPrefetchC = 0;
				}
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
#endif
			}

			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
					/ LineTime;

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
					__func__, *RequiredPrefetchPixDataBWLuma);
#endif
			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
					LinesToRequestPrefetchPixelData
					* myPipe->BytePerPixelC
					* swath_width_chroma_ub / LineTime;
		} else {
			MyError = true;
#ifdef __DML_VBA_DEBUG__
			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
					__func__, LinesToRequestPrefetchPixelData);
#endif
			*VRatioPrefetchY = 0;
			*VRatioPrefetchC = 0;
			*RequiredPrefetchPixDataBWLuma = 0;
			*RequiredPrefetchPixDataBWChroma = 0;
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
			(double)LinesToRequestPrefetchPixelData * LineTime +
			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
				PixelPTEBytesPerRow);
#endif
	} else {
		MyError = true;
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
				__func__, dst_y_prefetch_equ);
#endif
	}

	{
		double prefetch_vm_bw;
		double prefetch_row_bw;

		if (PDEAndMetaPTEBytesFrame == 0) {
			prefetch_vm_bw = 0;
		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
					__func__, *DestinationLinesToRequestVMInVBlank);
			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
#endif
			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
					(*DestinationLinesToRequestVMInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
		} else {
			prefetch_vm_bw = 0;
			MyError = true;
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
					__func__, *DestinationLinesToRequestVMInVBlank);
#endif
		}

		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
			prefetch_row_bw = 0;
		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
					(*DestinationLinesToRequestRowInVBlank * LineTime);

#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
					__func__, *DestinationLinesToRequestRowInVBlank);
			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
#endif
		} else {
			prefetch_row_bw = 0;
			MyError = true;
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
					__func__, *DestinationLinesToRequestRowInVBlank);
#endif
		}

		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
	}

	if (MyError) {
		*PrefetchBandwidth = 0;
		TimeForFetchingMetaPTE = 0;
		TimeForFetchingRowInVBlank = 0;
		*DestinationLinesToRequestVMInVBlank = 0;
		*DestinationLinesToRequestRowInVBlank = 0;
		*DestinationLinesForPrefetch = 0;
		LinesToRequestPrefetchPixelData = 0;
		*VRatioPrefetchY = 0;
		*VRatioPrefetchC = 0;
		*RequiredPrefetchPixDataBWLuma = 0;
		*RequiredPrefetchPixDataBWChroma = 0;
	}

	return MyError;
} // CalculatePrefetchSchedule

void dml32_CalculateFlipSchedule(
		double HostVMInefficiencyFactor,
		double UrgentExtraLatency,
		double UrgentLatency,
		unsigned int GPUVMMaxPageTableLevels,
		bool HostVMEnable,
		unsigned int HostVMMaxNonCachedPageTableLevels,
		bool GPUVMEnable,
		double HostVMMinPageSize,
		double PDEAndMetaPTEBytesPerFrame,
		double MetaRowBytes,
		double DPTEBytesPerRow,
		double BandwidthAvailableForImmediateFlip,
		unsigned int TotImmediateFlipBytes,
		enum source_format_class SourcePixelFormat,
		double LineTime,
		double VRatio,
		double VRatioChroma,
		double Tno_bw,
		bool DCCEnable,
		unsigned int dpte_row_height,
		unsigned int meta_row_height,
		unsigned int dpte_row_height_chroma,
		unsigned int meta_row_height_chroma,
		bool    use_one_row_for_frame_flip,

		/* Output */
		double *DestinationLinesToRequestVMInImmediateFlip,
		double *DestinationLinesToRequestRowInImmediateFlip,
		double *final_flip_bw,
		bool *ImmediateFlipSupportedForPipe)
{
	double min_row_time = 0.0;
	unsigned int HostVMDynamicLevelsTrips;
	double TimeForFetchingMetaPTEImmediateFlip;
	double TimeForFetchingRowInVBlankImmediateFlip;
	double ImmediateFlipBW = 1.0;

	if (GPUVMEnable == true && HostVMEnable == true)
		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
	else
		HostVMDynamicLevelsTrips = 0;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
#endif

	if (TotImmediateFlipBytes > 0) {
		if (use_one_row_for_frame_flip) {
			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
		} else {
			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
		}
		if (GPUVMEnable == true) {
			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
					HostVMInefficiencyFactor / ImmediateFlipBW,
					UrgentExtraLatency + UrgentLatency *
					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
					LineTime / 4.0);
		} else {
			TimeForFetchingMetaPTEImmediateFlip = 0;
		}
		if ((GPUVMEnable == true || DCCEnable == true)) {
			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
		} else {
			TimeForFetchingRowInVBlankImmediateFlip = 0;
		}

		*DestinationLinesToRequestVMInImmediateFlip =
				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
		*DestinationLinesToRequestRowInImmediateFlip =
				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;

		if (GPUVMEnable == true) {
			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
		} else if ((GPUVMEnable == true || DCCEnable == true)) {
			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
		} else {
			*final_flip_bw = 0;
		}
	} else {
		TimeForFetchingMetaPTEImmediateFlip = 0;
		TimeForFetchingRowInVBlankImmediateFlip = 0;
		*DestinationLinesToRequestVMInImmediateFlip = 0;
		*DestinationLinesToRequestRowInImmediateFlip = 0;
		*final_flip_bw = 0;
	}

	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
		if (GPUVMEnable == true && DCCEnable != true) {
			min_row_time = dml_min(dpte_row_height *
					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
		} else if (GPUVMEnable != true && DCCEnable == true) {
			min_row_time = dml_min(meta_row_height *
					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
		} else {
			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
					LineTime / VRatio, dpte_row_height_chroma * LineTime /
					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
		}
	} else {
		if (GPUVMEnable == true && DCCEnable != true) {
			min_row_time = dpte_row_height * LineTime / VRatio;
		} else if (GPUVMEnable != true && DCCEnable == true) {
			min_row_time = meta_row_height * LineTime / VRatio;
		} else {
			min_row_time =
				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
		}
	}

	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
					> min_row_time) {
		*ImmediateFlipSupportedForPipe = false;
	} else {
		*ImmediateFlipSupportedForPipe = true;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
			__func__, *DestinationLinesToRequestVMInImmediateFlip);
	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
			__func__, *DestinationLinesToRequestRowInImmediateFlip);
	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
			__func__, TimeForFetchingRowInVBlankImmediateFlip);
	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
#endif
} // CalculateFlipSchedule

void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
		struct vba_vars_st *v,
		unsigned int PrefetchMode,
		double DCFCLK,
		double ReturnBW,
		SOCParametersList mmSOCParameters,
		double SOCCLK,
		double DCFClkDeepSleep,
		unsigned int DETBufferSizeY[],
		unsigned int DETBufferSizeC[],
		unsigned int SwathHeightY[],
		unsigned int SwathHeightC[],
		double SwathWidthY[],
		double SwathWidthC[],
		unsigned int DPPPerSurface[],
		double BytePerPixelDETY[],
		double BytePerPixelDETC[],
		double DSTXAfterScaler[],
		double DSTYAfterScaler[],
		bool UnboundedRequestEnabled,
		unsigned int CompressedBufferSizeInkByte,

		/* Output */
		enum clock_change_support *DRAMClockChangeSupport,
		double MaxActiveDRAMClockChangeLatencySupported[],
		unsigned int SubViewportLinesNeededInMALL[],
		enum dm_fclock_change_support *FCLKChangeSupport,
		double *MinActiveFCLKChangeLatencySupported,
		bool *USRRetrainingSupport,
		double ActiveDRAMClockChangeLatencyMargin[])
{
	unsigned int i, j, k;
	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
	unsigned int DRAMClockChangeSupportNumber = 0;
	unsigned int LastSurfaceWithoutMargin = 0;
	unsigned int DRAMClockChangeMethod = 0;
	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
	double MinActiveFCLKChangeMargin = 0.;
	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
	double ActiveClockChangeLatencyHidingY;
	double ActiveClockChangeLatencyHidingC;
	double ActiveClockChangeLatencyHiding;
	double EffectiveDETBufferSizeY;
	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
	double TotalPixelBW = 0.0;
	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
	double     EffectiveLBLatencyHidingY;
	double     EffectiveLBLatencyHidingC;
	double     LinesInDETY[DC__NUM_DPP__MAX];
	double     LinesInDETC[DC__NUM_DPP__MAX];
	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
	double     FullDETBufferingTimeY;
	double     FullDETBufferingTimeC;
	double     WritebackDRAMClockChangeLatencyMargin;
	double     WritebackFCLKChangeLatencyMargin;
	double     WritebackLatencyHiding;
	bool    SameTimingForFCLKChange;

	unsigned int    TotalActiveWriteback = 0;
	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];

	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
			+ 10 / DCFClkDeepSleep;
	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
			+ 10 / DCFClkDeepSleep;
	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
			+ 10 / DCFClkDeepSleep;
	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
#endif


	TotalActiveWriteback = 0;
	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if (v->WritebackEnable[k] == true)
			TotalActiveWriteback = TotalActiveWriteback + 1;
	}

	if (TotalActiveWriteback <= 1) {
		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
	} else {
		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
	}
	if (v->USRRetrainingRequiredFinal)
		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
				+ mmSOCParameters.USRRetrainingLatency;

	if (TotalActiveWriteback <= 1) {
		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
				+ mmSOCParameters.WritebackLatency;
		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
				+ mmSOCParameters.WritebackLatency;
	} else {
		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
	}

	if (v->USRRetrainingRequiredFinal)
		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
				+ mmSOCParameters.USRRetrainingLatency;

	if (v->USRRetrainingRequiredFinal)
		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
				+ mmSOCParameters.USRRetrainingLatency;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
#endif

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
	}

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {

		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);


#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
#endif

		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
		EffectiveDETBufferSizeY = DETBufferSizeY[k];

		if (UnboundedRequestEnabled) {
			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
					+ CompressedBufferSizeInkByte * 1024
							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
		}

		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];

		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];

		if (v->NumberOfActiveSurfaces > 1) {
			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
							/ v->PixelClock[k] / v->VRatio[k];
		}

		if (BytePerPixelDETC[k] > 0) {
			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
					/ v->VRatioChroma[k];
			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
							/ v->PixelClock[k];
			if (v->NumberOfActiveSurfaces > 1) {
				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
								/ v->PixelClock[k] / v->VRatioChroma[k];
			}
			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
					ActiveClockChangeLatencyHidingC);
		} else {
			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
		}

		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
				- v->Watermark.DRAMClockChangeWatermark;
		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
				- v->Watermark.FCLKChangeWatermark;
		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;

		if (v->WritebackEnable[k]) {
			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
			if (v->WritebackPixelFormat[k] == dm_444_64)
				WritebackLatencyHiding = WritebackLatencyHiding / 2;

			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
					- v->Watermark.WritebackDRAMClockChangeWatermark;

			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
					- v->Watermark.WritebackFCLKChangeWatermark;

			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
					WritebackFCLKChangeLatencyMargin);
			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
					WritebackDRAMClockChangeLatencyMargin);
		}
		MaxActiveDRAMClockChangeLatencySupported[k] =
				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
						0 :
						(ActiveDRAMClockChangeLatencyMargin[k]
								+ mmSOCParameters.DRAMClockChangeLatency);
	}

	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
			if (i == j ||
					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
				SynchronizedSurfaces[i][j] = true;
			} else {
				SynchronizedSurfaces[i][j] = false;
			}
		}
	}

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
			SurfaceWithMinActiveFCLKChangeMargin = k;
		}
	}

	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;

	SameTimingForFCLKChange = true;
	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
					(SameTimingForFCLKChange ||
					ActiveFCLKChangeLatencyMargin[k] <
					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
			}
			SameTimingForFCLKChange = false;
		}
	}

	if (MinActiveFCLKChangeMargin > 0) {
		*FCLKChangeSupport = dm_fclock_change_vactive;
	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
			(PrefetchMode <= 1)) {
		*FCLKChangeSupport = dm_fclock_change_vblank;
	} else {
		*FCLKChangeSupport = dm_fclock_change_unsupported;
	}

	*USRRetrainingSupport = true;
	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
				(USRRetrainingLatencyMargin[k] < 0)) {
			*USRRetrainingSupport = false;
		}
	}

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
			if (PrefetchMode > 0) {
				DRAMClockChangeSupportNumber = 2;
			} else if (DRAMClockChangeSupportNumber == 0) {
				DRAMClockChangeSupportNumber = 1;
				LastSurfaceWithoutMargin = k;
			} else if (DRAMClockChangeSupportNumber == 1 &&
					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
				DRAMClockChangeSupportNumber = 2;
			}
		}
	}

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
			DRAMClockChangeMethod = 1;
		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
			DRAMClockChangeMethod = 2;
	}

	if (DRAMClockChangeMethod == 0) {
		if (DRAMClockChangeSupportNumber == 0)
			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
		else if (DRAMClockChangeSupportNumber == 1)
			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
		else
			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
	} else if (DRAMClockChangeMethod == 1) {
		if (DRAMClockChangeSupportNumber == 0)
			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
		else if (DRAMClockChangeSupportNumber == 1)
			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
		else
			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
	} else {
		if (DRAMClockChangeSupportNumber == 0)
			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
		else if (DRAMClockChangeSupportNumber == 1)
			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
		else
			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
	}

	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
		unsigned int dst_y_pstate;
		unsigned int src_y_pstate_l;
		unsigned int src_y_pstate_c;
		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;

		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];

#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
#endif
		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;

		if (BytePerPixelDETC[k] > 0) {
			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);

#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
#endif
		}
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
			__func__, *MinActiveFCLKChangeLatencySupported);
	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
#endif
} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport

double dml32_CalculateWriteBackDISPCLK(
		enum source_format_class WritebackPixelFormat,
		double PixelClock,
		double WritebackHRatio,
		double WritebackVRatio,
		unsigned int WritebackHTaps,
		unsigned int WritebackVTaps,
		unsigned int   WritebackSourceWidth,
		unsigned int   WritebackDestinationWidth,
		unsigned int HTotal,
		unsigned int WritebackLineBufferSize,
		double DISPCLKDPPCLKVCOSpeed)
{
	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;

	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
}

void dml32_CalculateMinAndMaxPrefetchMode(
		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
		unsigned int             *MinPrefetchMode,
		unsigned int             *MaxPrefetchMode)
{
	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
		*MinPrefetchMode = 3;
		*MaxPrefetchMode = 3;
	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
		*MinPrefetchMode = 2;
		*MaxPrefetchMode = 2;
	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
		*MinPrefetchMode = 1;
		*MaxPrefetchMode = 1;
	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
		*MinPrefetchMode = 0;
		*MaxPrefetchMode = 0;
	} else {
		*MinPrefetchMode = 0;
		*MaxPrefetchMode = 3;
	}
} // CalculateMinAndMaxPrefetchMode

void dml32_CalculatePixelDeliveryTimes(
		unsigned int             NumberOfActiveSurfaces,
		double              VRatio[],
		double              VRatioChroma[],
		double              VRatioPrefetchY[],
		double              VRatioPrefetchC[],
		unsigned int             swath_width_luma_ub[],
		unsigned int             swath_width_chroma_ub[],
		unsigned int             DPPPerSurface[],
		double              HRatio[],
		double              HRatioChroma[],
		double              PixelClock[],
		double              PSCL_THROUGHPUT[],
		double              PSCL_THROUGHPUT_CHROMA[],
		double              Dppclk[],
		unsigned int             BytePerPixelC[],
		enum dm_rotation_angle   SourceRotation[],
		unsigned int             NumberOfCursors[],
		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
		unsigned int             BlockWidth256BytesY[],
		unsigned int             BlockHeight256BytesY[],
		unsigned int             BlockWidth256BytesC[],
		unsigned int             BlockHeight256BytesC[],

		/* Output */
		double              DisplayPipeLineDeliveryTimeLuma[],
		double              DisplayPipeLineDeliveryTimeChroma[],
		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
		double              DisplayPipeRequestDeliveryTimeLuma[],
		double              DisplayPipeRequestDeliveryTimeChroma[],
		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
		double              CursorRequestDeliveryTime[],
		double              CursorRequestDeliveryTimePrefetch[])
{
	double   req_per_swath_ub;
	unsigned int k;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
#endif

		if (VRatio[k] <= 1) {
			DisplayPipeLineDeliveryTimeLuma[k] =
					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
		} else {
			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
		}

		if (BytePerPixelC[k] == 0) {
			DisplayPipeLineDeliveryTimeChroma[k] = 0;
		} else {
			if (VRatioChroma[k] <= 1) {
				DisplayPipeLineDeliveryTimeChroma[k] =
					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
			} else {
				DisplayPipeLineDeliveryTimeChroma[k] =
					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
			}
		}

		if (VRatioPrefetchY[k] <= 1) {
			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
		} else {
			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
		}

		if (BytePerPixelC[k] == 0) {
			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
		} else {
			if (VRatioPrefetchC[k] <= 1) {
				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
			} else {
				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
			}
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
#endif
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (!IsVertical(SourceRotation[k]))
			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
		else
			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
#endif

		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
		if (BytePerPixelC[k] == 0) {
			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
		} else {
			if (!IsVertical(SourceRotation[k]))
				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
			else
				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
#endif
			DisplayPipeRequestDeliveryTimeChroma[k] =
					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
#endif
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		unsigned int cursor_req_per_width;

		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
				256.0 / 8.0, 1.0);
		if (NumberOfCursors[k] > 0) {
			if (VRatio[k] <= 1) {
				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
						HRatio[k] / PixelClock[k] / cursor_req_per_width;
			} else {
				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
			}
			if (VRatioPrefetchY[k] <= 1) {
				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
						HRatio[k] / PixelClock[k] / cursor_req_per_width;
			} else {
				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
			}
		} else {
			CursorRequestDeliveryTime[k] = 0;
			CursorRequestDeliveryTimePrefetch[k] = 0;
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
				__func__, k, NumberOfCursors[k]);
		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
				__func__, k, CursorRequestDeliveryTime[k]);
		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
#endif
	}
} // CalculatePixelDeliveryTimes

void dml32_CalculateMetaAndPTETimes(
		bool use_one_row_for_frame[],
		unsigned int NumberOfActiveSurfaces,
		bool GPUVMEnable,
		unsigned int MetaChunkSize,
		unsigned int MinMetaChunkSizeBytes,
		unsigned int    HTotal[],
		double  VRatio[],
		double  VRatioChroma[],
		double  DestinationLinesToRequestRowInVBlank[],
		double  DestinationLinesToRequestRowInImmediateFlip[],
		bool DCCEnable[],
		double  PixelClock[],
		unsigned int BytePerPixelY[],
		unsigned int BytePerPixelC[],
		enum dm_rotation_angle SourceRotation[],
		unsigned int dpte_row_height[],
		unsigned int dpte_row_height_chroma[],
		unsigned int meta_row_width[],
		unsigned int meta_row_width_chroma[],
		unsigned int meta_row_height[],
		unsigned int meta_row_height_chroma[],
		unsigned int meta_req_width[],
		unsigned int meta_req_width_chroma[],
		unsigned int meta_req_height[],
		unsigned int meta_req_height_chroma[],
		unsigned int dpte_group_bytes[],
		unsigned int    PTERequestSizeY[],
		unsigned int    PTERequestSizeC[],
		unsigned int    PixelPTEReqWidthY[],
		unsigned int    PixelPTEReqHeightY[],
		unsigned int    PixelPTEReqWidthC[],
		unsigned int    PixelPTEReqHeightC[],
		unsigned int    dpte_row_width_luma_ub[],
		unsigned int    dpte_row_width_chroma_ub[],

		/* Output */
		double DST_Y_PER_PTE_ROW_NOM_L[],
		double DST_Y_PER_PTE_ROW_NOM_C[],
		double DST_Y_PER_META_ROW_NOM_L[],
		double DST_Y_PER_META_ROW_NOM_C[],
		double TimePerMetaChunkNominal[],
		double TimePerChromaMetaChunkNominal[],
		double TimePerMetaChunkVBlank[],
		double TimePerChromaMetaChunkVBlank[],
		double TimePerMetaChunkFlip[],
		double TimePerChromaMetaChunkFlip[],
		double time_per_pte_group_nom_luma[],
		double time_per_pte_group_vblank_luma[],
		double time_per_pte_group_flip_luma[],
		double time_per_pte_group_nom_chroma[],
		double time_per_pte_group_vblank_chroma[],
		double time_per_pte_group_flip_chroma[])
{
	unsigned int   meta_chunk_width;
	unsigned int   min_meta_chunk_width;
	unsigned int   meta_chunk_per_row_int;
	unsigned int   meta_row_remainder;
	unsigned int   meta_chunk_threshold;
	unsigned int   meta_chunks_per_row_ub;
	unsigned int   meta_chunk_width_chroma;
	unsigned int   min_meta_chunk_width_chroma;
	unsigned int   meta_chunk_per_row_int_chroma;
	unsigned int   meta_row_remainder_chroma;
	unsigned int   meta_chunk_threshold_chroma;
	unsigned int   meta_chunks_per_row_ub_chroma;
	unsigned int   dpte_group_width_luma;
	unsigned int   dpte_groups_per_row_luma_ub;
	unsigned int   dpte_group_width_chroma;
	unsigned int   dpte_groups_per_row_chroma_ub;
	unsigned int k;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
		if (BytePerPixelC[k] == 0)
			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
		else
			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
		if (BytePerPixelC[k] == 0)
			DST_Y_PER_META_ROW_NOM_C[k] = 0;
		else
			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (DCCEnable[k] == true) {
			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
			if (!IsVertical(SourceRotation[k]))
				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
			else
				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];

			if (meta_row_remainder <= meta_chunk_threshold)
				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
			else
				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;

			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
			if (BytePerPixelC[k] == 0) {
				TimePerChromaMetaChunkNominal[k] = 0;
				TimePerChromaMetaChunkVBlank[k] = 0;
				TimePerChromaMetaChunkFlip[k] = 0;
			} else {
				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
						meta_row_height_chroma[k];
				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
						meta_row_height_chroma[k];
				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
						meta_chunk_width_chroma;
				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
				if (!IsVertical(SourceRotation[k])) {
					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
							meta_req_width_chroma[k];
				} else {
					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
							meta_req_height_chroma[k];
				}
				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
				else
					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;

				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
			}
		} else {
			TimePerMetaChunkNominal[k] = 0;
			TimePerMetaChunkVBlank[k] = 0;
			TimePerMetaChunkFlip[k] = 0;
			TimePerChromaMetaChunkNominal[k] = 0;
			TimePerChromaMetaChunkVBlank[k] = 0;
			TimePerChromaMetaChunkFlip[k] = 0;
		}
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (GPUVMEnable == true) {
			if (!IsVertical(SourceRotation[k])) {
				dpte_group_width_luma = (double) dpte_group_bytes[k] /
						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
			} else {
				dpte_group_width_luma = (double) dpte_group_bytes[k] /
						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
			}

			if (use_one_row_for_frame[k]) {
				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
						(double) dpte_group_width_luma / 2.0, 1.0);
			} else {
				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
						(double) dpte_group_width_luma, 1.0);
			}
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
					__func__, k, use_one_row_for_frame[k]);
			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
					__func__, k, dpte_group_bytes[k]);
			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
					__func__, k, PTERequestSizeY[k]);
			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
					__func__, k, PixelPTEReqWidthY[k]);
			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
					__func__, k, PixelPTEReqHeightY[k]);
			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
					__func__, k, dpte_row_width_luma_ub[k]);
			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
					__func__, k, dpte_group_width_luma);
			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
					__func__, k, dpte_groups_per_row_luma_ub);
#endif

			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
			if (BytePerPixelC[k] == 0) {
				time_per_pte_group_nom_chroma[k] = 0;
				time_per_pte_group_vblank_chroma[k] = 0;
				time_per_pte_group_flip_chroma[k] = 0;
			} else {
				if (!IsVertical(SourceRotation[k])) {
					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
				} else {
					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
				}

				if (use_one_row_for_frame[k]) {
					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
							(double) dpte_group_width_chroma / 2.0, 1.0);
				} else {
					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
							(double) dpte_group_width_chroma, 1.0);
				}
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
						__func__, k, dpte_row_width_chroma_ub[k]);
				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
						__func__, k, dpte_group_width_chroma);
				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
						__func__, k, dpte_groups_per_row_chroma_ub);
#endif
				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
			}
		} else {
			time_per_pte_group_nom_luma[k] = 0;
			time_per_pte_group_vblank_luma[k] = 0;
			time_per_pte_group_flip_luma[k] = 0;
			time_per_pte_group_nom_chroma[k] = 0;
			time_per_pte_group_vblank_chroma[k] = 0;
			time_per_pte_group_flip_chroma[k] = 0;
		}
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
				__func__, k, TimePerMetaChunkNominal[k]);
		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
				__func__, k, TimePerMetaChunkVBlank[k]);
		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
				__func__, k, TimePerMetaChunkFlip[k]);
		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
				__func__, k, TimePerChromaMetaChunkNominal[k]);
		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
				__func__, k, TimePerChromaMetaChunkVBlank[k]);
		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
				__func__, k, TimePerChromaMetaChunkFlip[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
				__func__, k, time_per_pte_group_nom_luma[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
				__func__, k, time_per_pte_group_vblank_luma[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
				__func__, k, time_per_pte_group_flip_luma[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
				__func__, k, time_per_pte_group_nom_chroma[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
				__func__, k, time_per_pte_group_vblank_chroma[k]);
		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
				__func__, k, time_per_pte_group_flip_chroma[k]);
#endif
	}
} // CalculateMetaAndPTETimes

void dml32_CalculateVMGroupAndRequestTimes(
		unsigned int     NumberOfActiveSurfaces,
		bool     GPUVMEnable,
		unsigned int     GPUVMMaxPageTableLevels,
		unsigned int     HTotal[],
		unsigned int     BytePerPixelC[],
		double      DestinationLinesToRequestVMInVBlank[],
		double      DestinationLinesToRequestVMInImmediateFlip[],
		bool     DCCEnable[],
		double      PixelClock[],
		unsigned int        dpte_row_width_luma_ub[],
		unsigned int        dpte_row_width_chroma_ub[],
		unsigned int     vm_group_bytes[],
		unsigned int     dpde0_bytes_per_frame_ub_l[],
		unsigned int     dpde0_bytes_per_frame_ub_c[],
		unsigned int        meta_pte_bytes_per_frame_ub_l[],
		unsigned int        meta_pte_bytes_per_frame_ub_c[],

		/* Output */
		double      TimePerVMGroupVBlank[],
		double      TimePerVMGroupFlip[],
		double      TimePerVMRequestVBlank[],
		double      TimePerVMRequestFlip[])
{
	unsigned int k;
	unsigned int   num_group_per_lower_vm_stage;
	unsigned int   num_req_per_lower_vm_stage;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
#endif
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
#endif

		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
			if (DCCEnable[k] == false) {
				if (BytePerPixelC[k] > 0) {
					num_group_per_lower_vm_stage = dml_ceil(
							(double) (dpde0_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1.0) +
							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
							(double) (vm_group_bytes[k]), 1.0);
				} else {
					num_group_per_lower_vm_stage = dml_ceil(
							(double) (dpde0_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1.0);
				}
			} else {
				if (GPUVMMaxPageTableLevels == 1) {
					if (BytePerPixelC[k] > 0) {
						num_group_per_lower_vm_stage = dml_ceil(
							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1.0) +
							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
							(double) (vm_group_bytes[k]), 1.0);
					} else {
						num_group_per_lower_vm_stage = dml_ceil(
								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
								(double) (vm_group_bytes[k]), 1.0);
					}
				} else {
					if (BytePerPixelC[k] > 0) {
						num_group_per_lower_vm_stage = 2 + dml_ceil(
							(double) (dpde0_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1) +
							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
							(double) (vm_group_bytes[k]), 1) +
							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1) +
							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
							(double) (vm_group_bytes[k]), 1);
					} else {
						num_group_per_lower_vm_stage = 1 + dml_ceil(
							(double) (dpde0_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1) + dml_ceil(
							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
							(double) (vm_group_bytes[k]), 1);
					}
				}
			}

			if (DCCEnable[k] == false) {
				if (BytePerPixelC[k] > 0) {
					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
							dpde0_bytes_per_frame_ub_c[k] / 64;
				} else {
					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
				}
			} else {
				if (GPUVMMaxPageTableLevels == 1) {
					if (BytePerPixelC[k] > 0) {
						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
								meta_pte_bytes_per_frame_ub_c[k] / 64;
					} else {
						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
					}
				} else {
					if (BytePerPixelC[k] > 0) {
						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
								meta_pte_bytes_per_frame_ub_l[k] / 64 +
								meta_pte_bytes_per_frame_ub_c[k] / 64;
					} else {
						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
					}
				}
			}

			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;

			if (GPUVMMaxPageTableLevels > 2) {
				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
			}

		} else {
			TimePerVMGroupVBlank[k] = 0;
			TimePerVMGroupFlip[k] = 0;
			TimePerVMRequestVBlank[k] = 0;
			TimePerVMRequestFlip[k] = 0;
		}

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
#endif
	}
} // CalculateVMGroupAndRequestTimes

void dml32_CalculateDCCConfiguration(
		bool             DCCEnabled,
		bool             DCCProgrammingAssumesScanDirectionUnknown,
		enum source_format_class SourcePixelFormat,
		unsigned int             SurfaceWidthLuma,
		unsigned int             SurfaceWidthChroma,
		unsigned int             SurfaceHeightLuma,
		unsigned int             SurfaceHeightChroma,
		unsigned int                nomDETInKByte,
		unsigned int             RequestHeight256ByteLuma,
		unsigned int             RequestHeight256ByteChroma,
		enum dm_swizzle_mode     TilingFormat,
		unsigned int             BytePerPixelY,
		unsigned int             BytePerPixelC,
		double              BytePerPixelDETY,
		double              BytePerPixelDETC,
		enum dm_rotation_angle   SourceRotation,
		/* Output */
		unsigned int        *MaxUncompressedBlockLuma,
		unsigned int        *MaxUncompressedBlockChroma,
		unsigned int        *MaxCompressedBlockLuma,
		unsigned int        *MaxCompressedBlockChroma,
		unsigned int        *IndependentBlockLuma,
		unsigned int        *IndependentBlockChroma)
{
	typedef enum {
		REQ_256Bytes,
		REQ_128BytesNonContiguous,
		REQ_128BytesContiguous,
		REQ_NA
	} RequestType;

	RequestType   RequestLuma;
	RequestType   RequestChroma;

	unsigned int   segment_order_horz_contiguous_luma;
	unsigned int   segment_order_horz_contiguous_chroma;
	unsigned int   segment_order_vert_contiguous_luma;
	unsigned int   segment_order_vert_contiguous_chroma;
	unsigned int req128_horz_wc_l;
	unsigned int req128_horz_wc_c;
	unsigned int req128_vert_wc_l;
	unsigned int req128_vert_wc_c;
	unsigned int MAS_vp_horz_limit;
	unsigned int MAS_vp_vert_limit;
	unsigned int max_vp_horz_width;
	unsigned int max_vp_vert_height;
	unsigned int eff_surf_width_l;
	unsigned int eff_surf_width_c;
	unsigned int eff_surf_height_l;
	unsigned int eff_surf_height_c;
	unsigned int full_swath_bytes_horz_wc_l;
	unsigned int full_swath_bytes_horz_wc_c;
	unsigned int full_swath_bytes_vert_wc_l;
	unsigned int full_swath_bytes_vert_wc_c;
	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;

	unsigned int   yuv420;
	unsigned int   horz_div_l;
	unsigned int   horz_div_c;
	unsigned int   vert_div_l;
	unsigned int   vert_div_c;

	unsigned int     swath_buf_size;
	double   detile_buf_vp_horz_limit;
	double   detile_buf_vp_vert_limit;

	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
			SourcePixelFormat == dm_420_12) ? 1 : 0);
	horz_div_l = 1;
	horz_div_c = 1;
	vert_div_l = 1;
	vert_div_c = 1;

	if (BytePerPixelY == 1)
		vert_div_l = 0;
	if (BytePerPixelC == 1)
		vert_div_c = 0;

	if (BytePerPixelC == 0) {
		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
				BytePerPixelY / (1 + horz_div_l));
		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
				(1 + vert_div_l));
	} else {
		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
				(1 + vert_div_c) / (1 + yuv420));
	}

	if (SourcePixelFormat == dm_420_10) {
		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
	}

	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);

	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);

	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
	if (BytePerPixelC > 0) {
		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
	} else {
		full_swath_bytes_horz_wc_c = 0;
		full_swath_bytes_vert_wc_c = 0;
	}

	if (SourcePixelFormat == dm_420_10) {
		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
	}

	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
		req128_horz_wc_l = 0;
		req128_horz_wc_c = 0;
	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
		req128_horz_wc_l = 0;
		req128_horz_wc_c = 1;
	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
		req128_horz_wc_l = 1;
		req128_horz_wc_c = 0;
	} else {
		req128_horz_wc_l = 1;
		req128_horz_wc_c = 1;
	}

	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
		req128_vert_wc_l = 0;
		req128_vert_wc_c = 0;
	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
		req128_vert_wc_l = 0;
		req128_vert_wc_c = 1;
	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
		req128_vert_wc_l = 1;
		req128_vert_wc_c = 0;
	} else {
		req128_vert_wc_l = 1;
		req128_vert_wc_c = 1;
	}

	if (BytePerPixelY == 2) {
		segment_order_horz_contiguous_luma = 0;
		segment_order_vert_contiguous_luma = 1;
	} else {
		segment_order_horz_contiguous_luma = 1;
		segment_order_vert_contiguous_luma = 0;
	}

	if (BytePerPixelC == 2) {
		segment_order_horz_contiguous_chroma = 0;
		segment_order_vert_contiguous_chroma = 1;
	} else {
		segment_order_horz_contiguous_chroma = 1;
		segment_order_vert_contiguous_chroma = 0;
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
			__func__, segment_order_horz_contiguous_chroma);
#endif

	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
			RequestLuma = REQ_256Bytes;
		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
			RequestLuma = REQ_128BytesNonContiguous;
		else
			RequestLuma = REQ_128BytesContiguous;

		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
			RequestChroma = REQ_256Bytes;
		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
			RequestChroma = REQ_128BytesNonContiguous;
		else
			RequestChroma = REQ_128BytesContiguous;

	} else if (!IsVertical(SourceRotation)) {
		if (req128_horz_wc_l == 0)
			RequestLuma = REQ_256Bytes;
		else if (segment_order_horz_contiguous_luma == 0)
			RequestLuma = REQ_128BytesNonContiguous;
		else
			RequestLuma = REQ_128BytesContiguous;

		if (req128_horz_wc_c == 0)
			RequestChroma = REQ_256Bytes;
		else if (segment_order_horz_contiguous_chroma == 0)
			RequestChroma = REQ_128BytesNonContiguous;
		else
			RequestChroma = REQ_128BytesContiguous;

	} else {
		if (req128_vert_wc_l == 0)
			RequestLuma = REQ_256Bytes;
		else if (segment_order_vert_contiguous_luma == 0)
			RequestLuma = REQ_128BytesNonContiguous;
		else
			RequestLuma = REQ_128BytesContiguous;

		if (req128_vert_wc_c == 0)
			RequestChroma = REQ_256Bytes;
		else if (segment_order_vert_contiguous_chroma == 0)
			RequestChroma = REQ_128BytesNonContiguous;
		else
			RequestChroma = REQ_128BytesContiguous;
	}

	if (RequestLuma == REQ_256Bytes) {
		*MaxUncompressedBlockLuma = 256;
		*MaxCompressedBlockLuma = 256;
		*IndependentBlockLuma = 0;
	} else if (RequestLuma == REQ_128BytesContiguous) {
		*MaxUncompressedBlockLuma = 256;
		*MaxCompressedBlockLuma = 128;
		*IndependentBlockLuma = 128;
	} else {
		*MaxUncompressedBlockLuma = 256;
		*MaxCompressedBlockLuma = 64;
		*IndependentBlockLuma = 64;
	}

	if (RequestChroma == REQ_256Bytes) {
		*MaxUncompressedBlockChroma = 256;
		*MaxCompressedBlockChroma = 256;
		*IndependentBlockChroma = 0;
	} else if (RequestChroma == REQ_128BytesContiguous) {
		*MaxUncompressedBlockChroma = 256;
		*MaxCompressedBlockChroma = 128;
		*IndependentBlockChroma = 128;
	} else {
		*MaxUncompressedBlockChroma = 256;
		*MaxCompressedBlockChroma = 64;
		*IndependentBlockChroma = 64;
	}

	if (DCCEnabled != true || BytePerPixelC == 0) {
		*MaxUncompressedBlockChroma = 0;
		*MaxCompressedBlockChroma = 0;
		*IndependentBlockChroma = 0;
	}

	if (DCCEnabled != true) {
		*MaxUncompressedBlockLuma = 0;
		*MaxCompressedBlockLuma = 0;
		*IndependentBlockLuma = 0;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
#endif

} // CalculateDCCConfiguration

void dml32_CalculateStutterEfficiency(
		unsigned int      CompressedBufferSizeInkByte,
		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
		bool   UnboundedRequestEnabled,
		unsigned int      MetaFIFOSizeInKEntries,
		unsigned int      ZeroSizeBufferEntries,
		unsigned int      PixelChunkSizeInKByte,
		unsigned int   NumberOfActiveSurfaces,
		unsigned int      ROBBufferSizeInKByte,
		double    TotalDataReadBandwidth,
		double    DCFCLK,
		double    ReturnBW,
		unsigned int      CompbufReservedSpace64B,
		unsigned int      CompbufReservedSpaceZs,
		double    SRExitTime,
		double    SRExitZ8Time,
		bool   SynchronizeTimingsFinal,
		unsigned int   BlendingAndTiming[],
		double    StutterEnterPlusExitWatermark,
		double    Z8StutterEnterPlusExitWatermark,
		bool   ProgressiveToInterlaceUnitInOPP,
		bool   Interlace[],
		double    MinTTUVBlank[],
		unsigned int   DPPPerSurface[],
		unsigned int      DETBufferSizeY[],
		unsigned int   BytePerPixelY[],
		double    BytePerPixelDETY[],
		double      SwathWidthY[],
		unsigned int   SwathHeightY[],
		unsigned int   SwathHeightC[],
		double    NetDCCRateLuma[],
		double    NetDCCRateChroma[],
		double    DCCFractionOfZeroSizeRequestsLuma[],
		double    DCCFractionOfZeroSizeRequestsChroma[],
		unsigned int      HTotal[],
		unsigned int      VTotal[],
		double    PixelClock[],
		double    VRatio[],
		enum dm_rotation_angle SourceRotation[],
		unsigned int   BlockHeight256BytesY[],
		unsigned int   BlockWidth256BytesY[],
		unsigned int   BlockHeight256BytesC[],
		unsigned int   BlockWidth256BytesC[],
		unsigned int   DCCYMaxUncompressedBlock[],
		unsigned int   DCCCMaxUncompressedBlock[],
		unsigned int      VActive[],
		bool   DCCEnable[],
		bool   WritebackEnable[],
		double    ReadBandwidthSurfaceLuma[],
		double    ReadBandwidthSurfaceChroma[],
		double    meta_row_bw[],
		double    dpte_row_bw[],

		/* Output */
		double   *StutterEfficiencyNotIncludingVBlank,
		double   *StutterEfficiency,
		unsigned int     *NumberOfStutterBurstsPerFrame,
		double   *Z8StutterEfficiencyNotIncludingVBlank,
		double   *Z8StutterEfficiency,
		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
		double   *StutterPeriod,
		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
{

	bool FoundCriticalSurface = false;
	unsigned int SwathSizeCriticalSurface = 0;
	unsigned int LastChunkOfSwathSize;
	unsigned int MissingPartOfLastSwathOfDETSize;
	double LastZ8StutterPeriod = 0.0;
	double LastStutterPeriod = 0.0;
	unsigned int TotalNumberOfActiveOTG = 0;
	double doublePixelClock = 0;
	unsigned int doubleHTotal = 0;
	unsigned int doubleVTotal = 0;
	bool SameTiming = true;
	double DETBufferingTimeY;
	double SwathWidthYCriticalSurface = 0.0;
	double SwathHeightYCriticalSurface = 0.0;
	double VActiveTimeCriticalSurface = 0.0;
	double FrameTimeCriticalSurface = 0.0;
	unsigned int BytePerPixelYCriticalSurface = 0;
	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
	unsigned int DETBufferSizeYCriticalSurface = 0;
	double MinTTUVBlankCriticalSurface = 0.0;
	unsigned int BlockWidth256BytesYCriticalSurface = 0;
	bool doublePlaneCriticalSurface = 0;
	bool doublePipeCriticalSurface = 0;
	double TotalCompressedReadBandwidth;
	double TotalRowReadBandwidth;
	double AverageDCCCompressionRate;
	double EffectiveCompressedBufferSize;
	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
	double StutterBurstTime;
	unsigned int TotalActiveWriteback;
	double LinesInDETY;
	double LinesInDETYRoundedDownToSwath;
	double MaximumEffectiveCompressionLuma;
	double MaximumEffectiveCompressionChroma;
	double TotalZeroSizeRequestReadBandwidth;
	double TotalZeroSizeCompressedReadBandwidth;
	double AverageDCCZeroSizeFraction;
	double AverageZeroSizeCompressionRate;
	unsigned int k;

	TotalZeroSizeRequestReadBandwidth = 0;
	TotalZeroSizeCompressedReadBandwidth = 0;
	TotalRowReadBandwidth = 0;
	TotalCompressedReadBandwidth = 0;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
			if (DCCEnable[k] == true) {
				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
						|| (!IsVertical(SourceRotation[k])
								&& BlockHeight256BytesY[k] > SwathHeightY[k])
						|| DCCYMaxUncompressedBlock[k] < 256) {
					MaximumEffectiveCompressionLuma = 2;
				} else {
					MaximumEffectiveCompressionLuma = 4;
				}
				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
						+ ReadBandwidthSurfaceLuma[k]
								/ dml_min(NetDCCRateLuma[k],
										MaximumEffectiveCompressionLuma);
#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
						__func__, k, ReadBandwidthSurfaceLuma[k]);
				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
						__func__, k, NetDCCRateLuma[k]);
				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
						__func__, k, MaximumEffectiveCompressionLuma);
#endif
				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
								/ MaximumEffectiveCompressionLuma;

				if (ReadBandwidthSurfaceChroma[k] > 0) {
					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
							|| (!IsVertical(SourceRotation[k])
									&& BlockHeight256BytesC[k] > SwathHeightC[k])
							|| DCCCMaxUncompressedBlock[k] < 256) {
						MaximumEffectiveCompressionChroma = 2;
					} else {
						MaximumEffectiveCompressionChroma = 4;
					}
					TotalCompressedReadBandwidth =
							TotalCompressedReadBandwidth
							+ ReadBandwidthSurfaceChroma[k]
							/ dml_min(NetDCCRateChroma[k],
							MaximumEffectiveCompressionChroma);
#ifdef __DML_VBA_DEBUG__
					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
							__func__, k, ReadBandwidthSurfaceChroma[k]);
					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
							__func__, k, NetDCCRateChroma[k]);
					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
							__func__, k, MaximumEffectiveCompressionChroma);
#endif
					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
							+ ReadBandwidthSurfaceChroma[k]
									* DCCFractionOfZeroSizeRequestsChroma[k];
					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
							+ ReadBandwidthSurfaceChroma[k]
									* DCCFractionOfZeroSizeRequestsChroma[k]
									/ MaximumEffectiveCompressionChroma;
				}
			} else {
				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
			}
			TotalRowReadBandwidth = TotalRowReadBandwidth
					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
		}
	}

	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
			__func__, TotalZeroSizeCompressedReadBandwidth);
	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
#endif
	if (AverageDCCZeroSizeFraction == 1) {
		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
				/ TotalZeroSizeCompressedReadBandwidth;
		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
				* AverageZeroSizeCompressionRate
				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
						* AverageZeroSizeCompressionRate;
	} else if (AverageDCCZeroSizeFraction > 0) {
		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
				/ TotalZeroSizeCompressedReadBandwidth;
		EffectiveCompressedBufferSize = dml_min(
				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
				(double) MetaFIFOSizeInKEntries * 1024 * 64
					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
					+ 1 / AverageDCCCompressionRate))
					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
					* AverageDCCCompressionRate,
					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: min 1 = %f\n", __func__,
				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
						AverageDCCCompressionRate));
		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
#endif
	} else {
		EffectiveCompressedBufferSize = dml_min(
				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
						* AverageDCCCompressionRate;

#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: min 1 = %f\n", __func__,
				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
		dml_print("DML::%s: min 2 = %f\n", __func__,
				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
#endif
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
#endif

	*StutterPeriod = 0;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
			LinesInDETY = ((double) DETBufferSizeY[k]
					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
					/ BytePerPixelDETY[k] / SwathWidthY[k];
			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
					/ VRatio[k];
#ifdef __DML_VBA_DEBUG__
			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
					__func__, k, ReadBandwidthSurfaceLuma[k]);
			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
					__func__, k, LinesInDETYRoundedDownToSwath);
			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
#endif

			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;

				FoundCriticalSurface = true;
				*StutterPeriod = DETBufferingTimeY;
				FrameTimeCriticalSurface = (
						isInterlaceTiming ?
								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
						* (double) HTotal[k] / PixelClock[k];
				VActiveTimeCriticalSurface = (
						isInterlaceTiming ?
								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
						* (double) HTotal[k] / PixelClock[k];
				BytePerPixelYCriticalSurface = BytePerPixelY[k];
				SwathWidthYCriticalSurface = SwathWidthY[k];
				SwathHeightYCriticalSurface = SwathHeightY[k];
				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);

#ifdef __DML_VBA_DEBUG__
				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
						__func__, k, FoundCriticalSurface);
				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
						__func__, k, *StutterPeriod);
				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
						__func__, k, MinTTUVBlankCriticalSurface);
				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
						__func__, k, FrameTimeCriticalSurface);
				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
						__func__, k, VActiveTimeCriticalSurface);
				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
						__func__, k, BytePerPixelYCriticalSurface);
				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
						__func__, k, SwathWidthYCriticalSurface);
				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
						__func__, k, SwathHeightYCriticalSurface);
				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
						__func__, k, BlockWidth256BytesYCriticalSurface);
				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
						__func__, k, doublePlaneCriticalSurface);
				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
						__func__, k, doublePipeCriticalSurface);
				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
#endif
			}
		}
	}

	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
			EffectiveCompressedBufferSize);
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
			__func__, *StutterPeriod * TotalDataReadBandwidth);
	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
#endif

	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
			/ ReturnBW
			+ (*StutterPeriod * TotalDataReadBandwidth
					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
			AverageDCCCompressionRate / ReturnBW);
	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
			__func__, (*StutterPeriod * TotalDataReadBandwidth));
	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
#endif
	StutterBurstTime = dml_max(StutterBurstTime,
			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
					* SwathWidthYCriticalSurface / ReturnBW);

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: Time to finish residue swath=%f\n",
			__func__,
			LinesToFinishSwathTransferStutterCriticalSurface *
			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
#endif

	TotalActiveWriteback = 0;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (WritebackEnable[k])
			TotalActiveWriteback = TotalActiveWriteback + 1;
	}

	if (TotalActiveWriteback == 0) {
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
#endif
		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
		*NumberOfStutterBurstsPerFrame = (
				*StutterEfficiencyNotIncludingVBlank > 0 ?
						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
		*Z8NumberOfStutterBurstsPerFrame = (
				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
	} else {
		*StutterEfficiencyNotIncludingVBlank = 0.;
		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
		*NumberOfStutterBurstsPerFrame = 0;
		*Z8NumberOfStutterBurstsPerFrame = 0;
	}
#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
			__func__, *StutterEfficiencyNotIncludingVBlank);
	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
#endif

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
			if (BlendingAndTiming[k] == k) {
				if (TotalNumberOfActiveOTG == 0) {
					doublePixelClock = PixelClock[k];
					doubleHTotal = HTotal[k];
					doubleVTotal = VTotal[k];
				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
						|| doubleVTotal != VTotal[k]) {
					SameTiming = false;
				}
				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
			}
		}
	}

	if (*StutterEfficiencyNotIncludingVBlank > 0) {
		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;

		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
						+ StutterBurstTime * VActiveTimeCriticalSurface
						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
		} else {
			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
		}
	} else {
		*StutterEfficiency = 0;
	}

	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
		LastZ8StutterPeriod = VActiveTimeCriticalSurface
				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
		} else {
			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
		}
	} else {
		*Z8StutterEfficiency = 0.;
	}

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
			__func__, *StutterEfficiencyNotIncludingVBlank);
	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
#endif

	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
			- DETBufferSizeYCriticalSurface;

	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
#endif
} // CalculateStutterEfficiency

void dml32_CalculateMaxDETAndMinCompressedBufferSize(
		unsigned int    ConfigReturnBufferSizeInKByte,
		unsigned int    ROBBufferSizeInKByte,
		unsigned int MaxNumDPP,
		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA

		/* Output */
		unsigned int *MaxTotalDETInKByte,
		unsigned int *nomDETInKByte,
		unsigned int *MinCompressedBufferSizeInKByte)
{
	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;

	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;

#ifdef __DML_VBA_DEBUG__
	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
#endif

	if (det_buff_size_override_en) {
		*nomDETInKByte = det_buff_size_override_val;
#ifdef __DML_VBA_DEBUG__
		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
#endif
	}
} // CalculateMaxDETAndMinCompressedBufferSize

bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
		double ReturnBW,
		bool NotUrgentLatencyHiding[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double cursor_bw[],
		double meta_row_bandwidth[],
		double dpte_row_bandwidth[],
		unsigned int NumberOfDPP[],
		double UrgentBurstFactorLuma[],
		double UrgentBurstFactorChroma[],
		double UrgentBurstFactorCursor[])
{
	unsigned int k;
	bool NotEnoughUrgentLatencyHiding = false;
	bool CalculateVActiveBandwithSupport_val = false;
	double VActiveBandwith = 0;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (NotUrgentLatencyHiding[k]) {
			NotEnoughUrgentLatencyHiding = true;
		}
	}

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
	}

	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;

#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
#endif
	return CalculateVActiveBandwithSupport_val;
}

void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
		double ReturnBW,
		bool NotUrgentLatencyHiding[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double PrefetchBandwidthLuma[],
		double PrefetchBandwidthChroma[],
		double cursor_bw[],
		double meta_row_bandwidth[],
		double dpte_row_bandwidth[],
		double cursor_bw_pre[],
		double prefetch_vmrow_bw[],
		unsigned int NumberOfDPP[],
		double UrgentBurstFactorLuma[],
		double UrgentBurstFactorChroma[],
		double UrgentBurstFactorCursor[],
		double UrgentBurstFactorLumaPre[],
		double UrgentBurstFactorChromaPre[],
		double UrgentBurstFactorCursorPre[],
		double PrefetchBW[],
		double VRatio[],
		double MaxVRatioPre,

		/* output */
		double  *MaxPrefetchBandwidth,
		double  *FractionOfUrgentBandwidth,
		bool *PrefetchBandwidthSupport)
{
	unsigned int k;
	double ActiveBandwidthPerSurface;
	bool NotEnoughUrgentLatencyHiding = false;
	double TotalActiveBandwidth = 0;
	double TotalPrefetchBandwidth = 0;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (NotUrgentLatencyHiding[k]) {
			NotEnoughUrgentLatencyHiding = true;
		}
	}

	*MaxPrefetchBandwidth = 0;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);

		TotalActiveBandwidth += ActiveBandwidthPerSurface;

		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];

		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
				ActiveBandwidthPerSurface,
				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
	}

	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
	else
		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;

	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
}

double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
		double ReturnBW,
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double PrefetchBandwidthLuma[],
		double PrefetchBandwidthChroma[],
		double cursor_bw[],
		double cursor_bw_pre[],
		unsigned int NumberOfDPP[],
		double UrgentBurstFactorLuma[],
		double UrgentBurstFactorChroma[],
		double UrgentBurstFactorCursor[],
		double UrgentBurstFactorLumaPre[],
		double UrgentBurstFactorChromaPre[],
		double UrgentBurstFactorCursorPre[])
{
	unsigned int k;
	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;

	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
	}

	return CalculateBandwidthAvailableForImmediateFlip_val;
}

void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
		double ReturnBW,
		enum immediate_flip_requirement ImmediateFlipRequirement[],
		double final_flip_bw[],
		double ReadBandwidthLuma[],
		double ReadBandwidthChroma[],
		double PrefetchBandwidthLuma[],
		double PrefetchBandwidthChroma[],
		double cursor_bw[],
		double meta_row_bandwidth[],
		double dpte_row_bandwidth[],
		double cursor_bw_pre[],
		double prefetch_vmrow_bw[],
		unsigned int NumberOfDPP[],
		double UrgentBurstFactorLuma[],
		double UrgentBurstFactorChroma[],
		double UrgentBurstFactorCursor[],
		double UrgentBurstFactorLumaPre[],
		double UrgentBurstFactorChromaPre[],
		double UrgentBurstFactorCursorPre[],

		/* output */
		double  *TotalBandwidth,
		double  *FractionOfUrgentBandwidth,
		bool *ImmediateFlipBandwidthSupport)
{
	unsigned int k;
	*TotalBandwidth = 0;
	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
		} else {
			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
		}
	}
	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
}

bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
		double ReturnBW,
		double UrgentLatency,
		unsigned int SwathHeightY[],
		unsigned int SwathHeightC[],
		unsigned int SwathWidthY[],
		unsigned int SwathWidthC[],
		double  BytePerPixelInDETY[],
		double  BytePerPixelInDETC[],
		unsigned int    DETBufferSizeY[],
		unsigned int    DETBufferSizeC[],
		unsigned int	NumOfDPP[],
		unsigned int	HTotal[],
		double	PixelClock[],
		double	VRatioY[],
		double	VRatioC[],
		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
		enum unbounded_requesting_policy UseUnboundedRequesting)
{
	int k;
	double SwathSizeAllSurfaces = 0;
	double SwathSizeAllSurfacesInFetchTimeUs;
	double DETSwathLatencyHidingUs;
	double DETSwathLatencyHidingYUs;
	double DETSwathLatencyHidingCUs;
	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
	bool NotEnoughDETSwathFillLatencyHiding = false;

	if (UseUnboundedRequesting == dm_unbounded_requesting)
		return false;

	/* calculate sum of single swath size for all pipes in bytes */
	for (k = 0; k < NumberOfActiveSurfaces; k++) {
		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];

		if (SwathHeightC[k] != 0)
			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
		else
			SwathSizePerSurfaceC[k] = 0;

		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
	}

	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;

	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
	for (k = 0; k < NumberOfActiveSurfaces; k++) {
		double LineTime = HTotal[k] / PixelClock[k];

		/* only care if surface is not phantom */
		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;

			if (SwathHeightC[k] != 0) {
				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;

				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
			} else {
				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
			}

			/* DET must be able to hide time to fetch 1 swath for each surface */
			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
				NotEnoughDETSwathFillLatencyHiding = true;
				break;
			}
		}
	}

	return NotEnoughDETSwathFillLatencyHiding;
}