godot/thirdparty/meshoptimizer/quantization.cpp

// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"

#include <assert.h>

union FloatBits
{
	float f;
	unsigned int ui;
};

unsigned short meshopt_quantizeHalf(float v)
{
	FloatBits u = {v};
	unsigned int ui = u.ui;

	int s = (ui >> 16) & 0x8000;
	int em = ui & 0x7fffffff;

	// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
	int h = (em - (112 << 23) + (1 << 12)) >> 13;

	// underflow: flush to zero; 113 encodes exponent -14
	h = (em < (113 << 23)) ? 0 : h;

	// overflow: infinity; 143 encodes exponent 16
	h = (em >= (143 << 23)) ? 0x7c00 : h;

	// NaN; note that we convert all types of NaN to qNaN
	h = (em > (255 << 23)) ? 0x7e00 : h;

	return (unsigned short)(s | h);
}

float meshopt_quantizeFloat(float v, int N)
{
	assert(N >= 0 && N <= 23);

	FloatBits u = {v};
	unsigned int ui = u.ui;

	const int mask = (1 << (23 - N)) - 1;
	const int round = (1 << (23 - N)) >> 1;

	int e = ui & 0x7f800000;
	unsigned int rui = (ui + round) & ~mask;

	// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
	ui = e == 0x7f800000 ? ui : rui;

	// flush denormals to zero
	ui = e == 0 ? 0 : ui;

	u.ui = ui;
	return u.f;
}

float meshopt_dequantizeHalf(unsigned short h)
{
	unsigned int s = unsigned(h & 0x8000) << 16;
	int em = h & 0x7fff;

	// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
	int r = (em + (112 << 10)) << 13;

	// denormal: flush to zero
	r = (em < (1 << 10)) ? 0 : r;

	// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
	// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
	r += (em >= (31 << 10)) ? (112 << 23) : 0;

	FloatBits u;
	u.ui = s | r;
	return u.f;
}