
// basisu_transcoder.cpp
// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//    http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

#include "basisu_transcoder.h"
#include <limits.h>
#include "basisu_containers_impl.h"

// TODO: This doesn't work on OSX. How can this be so difficult?
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
//	#define BASISD_IS_BIG_ENDIAN (1)

	#ifdef __EMSCRIPTEN__
		// Can't use unaligned loads/stores with WebAssembly.
	#elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)

// Using unaligned loads and stores causes errors when using UBSan. Jam it off.
#if defined(__has_feature)
#if __has_feature(undefined_behavior_sanitizer)


	#error Must have defined BASISD_SUPPORT_KTX2

#error Must have defined BASISD_SUPPORT_KTX2_ZSTD

// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.



// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)

// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.



// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.


// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.

// Support for ETC2 EAC R11 and ETC2 EAC RG11

// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
// This impacts grayscale and grayscale+alpha textures the most.
	#ifdef __EMSCRIPTEN__
		// Let's assume size matters more than quality when compiling with emscripten.
		// Compiling native, so an extra 64K lookup table is probably acceptable.




		#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1



// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
   // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
		// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
		#include <zstd.h>

namespace basisu
	bool g_debug_printf;

	void enable_debug_printf(bool enabled)

	void debug_printf(const char* pFmt, ...)
} // namespace basisu

namespace basist

	static uint32_t g_debug_flags = 0;

	uint32_t get_debug_flags()

	void set_debug_flags(uint32_t f)

	inline uint16_t byteswap_uint16(uint16_t v)

	static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {}
	static inline float clampf(float value, float low, float high) {}
	static inline float saturate(float value) {}

	static inline uint8_t mul_8(uint32_t v, uint32_t q) {}

	uint16_t crc16(const void* r, size_t size, uint16_t crc)
	enum etc_constants


	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);

	//const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
	const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] =;
	static const uint8_t g_etc_5_to_8[32] =;

	struct decoder_etc_block

	enum dxt_constants

	static const uint8_t g_etc1_x_selector_unpack[4][256] =;

	struct dxt1_block

	struct dxt_selector_range

	struct etc1_to_dxt1_56_solution

	static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =;

	const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES =;

	static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];

	static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =;
	static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
	static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];

	static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] =;

	static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] =;

	// First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
	struct bc1_match_entry
	static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
	static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo

	static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)

	static void create_etc1_to_dxt1_5_conversion_table()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 31; hi++)
							for (uint32_t lo = 0; lo <= 31; lo++)
								//if (lo == hi) continue;

								uint32_t colors[4];

								colors[0] = (lo << 3) | (lo >> 2);
								colors[3] = (hi << 3) | (hi >> 2);

								colors[1] = (colors[0] * 2 + colors[3]) / 3;
								colors[2] = (colors[3] * 2 + colors[0]) / 3;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];

									total_err += err * err;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						assert(best_err <= 0xFFFF);

						//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
						//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
						//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);

						//assert(best_lo != best_hi);
						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten


	static void create_etc1_to_dxt1_6_conversion_table()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 63; hi++)
							for (uint32_t lo = 0; lo <= 63; lo++)
								//if (lo == hi) continue;

								uint32_t colors[4];

								colors[0] = (lo << 2) | (lo >> 4);
								colors[3] = (hi << 2) | (hi >> 4);

								colors[1] = (colors[0] * 2 + colors[3]) / 3;
								colors[2] = (colors[3] * 2 + colors[0]) / 3;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];

									total_err += err * err;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						assert(best_err <= 0xFFFF);

						//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
						//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
						//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);

						//assert(best_lo != best_hi);
						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");

					} // m
				} // sr
			} // g
		} // inten


	static const int8_t g_eac_modifier_table[16][8] =;

	// Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
	struct eac_block


	static const dxt_selector_range s_etc2_eac_selector_ranges[] =;

	const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES =;

	struct etc1_g_to_eac_conversion


	struct pack_eac_a8_results
		uint32_t m_base;
		uint32_t m_table;
		uint32_t m_multiplier;
		basisu::vector<uint8_t> m_selectors;
		basisu::vector<uint8_t> m_selectors_temp;

	static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)

		uint64_t best_err = UINT64_MAX;

		for (uint32_t base_color = 0; base_color < 256; base_color++)
			for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
				for (uint32_t table = 0; table < 16; table++)
					uint64_t total_err = 0;

					for (uint32_t i = 0; i < num_pixels; i++)
						const int a = pPixels[i];

						uint32_t best_s_err = UINT32_MAX;
						uint32_t best_s = 0;
						for (uint32_t s = 0; s < 8; s++)
							int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
							if (v < 0)
								v = 0;
							else if (v > 255)
								v = 255;

							uint32_t err = abs(a - v);
							if (err < best_s_err)
								best_s_err = err;
								best_s = s;

						results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);

						total_err += best_s_err * best_s_err;
						if (total_err >= best_err)

					if (total_err < best_err)
						best_err = total_err;
						results.m_base = base_color;
						results.m_multiplier = multiplier;
						results.m_table = table;

				} // table

			} // multiplier

		} // base_color

		return best_err;
		etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =;

	static void create_etc2_eac_a8_conversion_table()
		FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");

		for (uint32_t inten = 0; inten < 8; inten++)
			for (uint32_t base = 0; base < 32; base++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);

				fprintf(pFile, "{");

				for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
					const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
					const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;

					// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
					// Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.

					uint8_t pixels[4];
					uint32_t num_pixels = 0;
					for (uint32_t s = low_selector; s <= high_selector; s++)
						pixels[num_pixels++] = block_colors[s].g;

					pack_eac_a8_results pack_results;
					pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);

					etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];

					c.m_base = pack_results.m_base;
					c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
					c.m_trans = 0;

					for (uint32_t s = 0; s < 4; s++)
						if ((s < low_selector) || (s > high_selector))

						uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];

						c.m_trans |= (etc2_selector << (s * 3));

					fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
					if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
						fprintf(pFile, ",");

				fprintf(pFile, "},\n");


	struct pack_eac_r11_results
		uint32_t m_base;
		uint32_t m_table;
		uint32_t m_multiplier;
		basisu::vector<uint8_t> m_selectors;
		basisu::vector<uint8_t> m_selectors_temp;

	static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)

		uint64_t best_err = UINT64_MAX;

		for (uint32_t base_color = 0; base_color < 256; base_color++)
			for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
				for (uint32_t table = 0; table < 16; table++)
					uint64_t total_err = 0;

					for (uint32_t i = 0; i < num_pixels; i++)
						// Convert 8-bit input to 11-bits
						const int a = (pPixels[i] * 2047 + 128) / 255;

						uint32_t best_s_err = UINT32_MAX;
						uint32_t best_s = 0;
						for (uint32_t s = 0; s < 8; s++)
							int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
							if (v < 0)
								v = 0;
							else if (v > 2047)
								v = 2047;

							uint32_t err = abs(a - v);
							if (err < best_s_err)
								best_s_err = err;
								best_s = s;

						results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);

						total_err += best_s_err * best_s_err;
						if (total_err >= best_err)

					if (total_err < best_err)
						best_err = total_err;
						results.m_base = base_color;
						results.m_multiplier = multiplier;
						results.m_table = table;

				} // table

			} // multiplier

		} // base_color

		return best_err;

	static void create_etc2_eac_r11_conversion_table()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");

		for (uint32_t inten = 0; inten < 8; inten++)
			for (uint32_t base = 0; base < 32; base++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);

				fprintf(pFile, "{");

				for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
					const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
					const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;

					// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
					// Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.

					uint8_t pixels[4];
					uint32_t num_pixels = 0;
					for (uint32_t s = low_selector; s <= high_selector; s++)
						pixels[num_pixels++] = block_colors[s].g;

					pack_eac_r11_results pack_results;
					pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);

					etc1_g_to_eac_conversion c;

					c.m_base = (uint8_t)pack_results.m_base;
					c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
					c.m_trans = 0;

					for (uint32_t s = 0; s < 4; s++)
						if ((s < low_selector) || (s > high_selector))

						uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];

						c.m_trans |= (etc2_selector << (s * 3));

					fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
					if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
						fprintf(pFile, ",");

				fprintf(pFile, "},\n");


	static void create_etc1_to_astc_conversion_table_0_47();
	static void create_etc1_to_astc_conversion_table_0_255();

	static void transcoder_init_astc();

	static void create_etc1_to_bc7_m5_color_conversion_table();
	static void create_etc1_to_bc7_m5_alpha_conversion_table();

	static void transcoder_init_bc7_mode5();

	static void create_etc1s_to_atc_conversion_tables();

	static void transcoder_init_atc();

	static void transcoder_init_pvrtc2();

	void uastc_init();

	static bool g_transcoder_initialized;
	// Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
	// If this is too slow, these computed tables can easilky be moved to be compiled in.
	void basisu_transcoder_init()

	static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)

	static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
		convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);

		if (g_debug_flags & cDebugFlagVisBC1Sels)
			uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
			uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
		else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
			for (uint32_t y = 0; y < 4; y++)
				for (uint32_t x = 0; x < 4; x++)
					pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);

	struct fxt1_block
				uint64_t m_t00 : 2;
				uint64_t m_t01 : 2;
				uint64_t m_t02 : 2;
				uint64_t m_t03 : 2;
				uint64_t m_t04 : 2;
				uint64_t m_t05 : 2;
				uint64_t m_t06 : 2;
				uint64_t m_t07 : 2;
				uint64_t m_t08 : 2;
				uint64_t m_t09 : 2;
				uint64_t m_t10 : 2;
				uint64_t m_t11 : 2;
				uint64_t m_t12 : 2;
				uint64_t m_t13 : 2;
				uint64_t m_t14 : 2;
				uint64_t m_t15 : 2;
				uint64_t m_t16 : 2;
				uint64_t m_t17 : 2;
				uint64_t m_t18 : 2;
				uint64_t m_t19 : 2;
				uint64_t m_t20 : 2;
				uint64_t m_t21 : 2;
				uint64_t m_t22 : 2;
				uint64_t m_t23 : 2;
				uint64_t m_t24 : 2;
				uint64_t m_t25 : 2;
				uint64_t m_t26 : 2;
				uint64_t m_t27 : 2;
				uint64_t m_t28 : 2;
				uint64_t m_t29 : 2;
				uint64_t m_t30 : 2;
				uint64_t m_t31 : 2;
			} m_lo;
			uint64_t m_lo_bits;
			uint8_t m_sels[8];
				uint64_t m_b1 : 5;
				uint64_t m_g1 : 5;
				uint64_t m_r1 : 5;
				uint64_t m_b0 : 5;
				uint64_t m_g0 : 5;
				uint64_t m_r0 : 5;
				uint64_t m_b3 : 5;
				uint64_t m_g3 : 5;
				uint64_t m_r3 : 5;
				uint64_t m_b2 : 5;
				uint64_t m_g2 : 5;
				uint64_t m_r2 : 5;
				uint64_t m_b0 : 5;
				uint64_t m_g0 : 5;
				uint64_t m_r0 : 5;
				uint64_t m_b1 : 5;
				uint64_t m_g1 : 5;
				uint64_t m_r1 : 5;
				uint64_t m_b2 : 5;
				uint64_t m_g2 : 5;
				uint64_t m_r2 : 5;
				uint64_t m_b3 : 5;
				uint64_t m_g3 : 5;
				uint64_t m_r3 : 5;
				uint64_t m_alpha : 1;
				uint64_t m_glsb : 2;
				uint64_t m_mode : 1;
			} m_hi;
			uint64_t m_hi_bits;

	static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
		static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
		return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);

	static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
		fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);

		// CC_MIXED is basically DXT1 with different encoding tricks.
		// So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. 
		// (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
		dxt1_block blk;
		convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);

		const uint32_t l = blk.get_low_color();
		const uint32_t h = blk.get_high_color();

		color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
		color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);

		uint32_t g0 = color0.g & 1;
		uint32_t g1 = color1.g & 1;
		color0.g >>= 1;
		color1.g >>= 1;

		blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
		blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
		blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
		blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
		if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
			std::swap(color0, color1);
			std::swap(g0, g1);

			blk.m_selectors[0] ^= 0xFF;
			blk.m_selectors[1] ^= 0xFF;
			blk.m_selectors[2] ^= 0xFF;
			blk.m_selectors[3] ^= 0xFF;

		if (fxt1_subblock == 0)
			pBlock->m_hi.m_mode = 1; 
			pBlock->m_hi.m_alpha = 0;
			pBlock->m_hi.m_glsb = g1 | (g1 << 1);
			pBlock->m_hi.m_r0 = color0.r;
			pBlock->m_hi.m_g0 = color0.g;
			pBlock->m_hi.m_b0 = color0.b;
			pBlock->m_hi.m_r1 = color1.r;
			pBlock->m_hi.m_g1 = color1.g;
			pBlock->m_hi.m_b1 = color1.b;
			pBlock->m_hi.m_r2 = color0.r;
			pBlock->m_hi.m_g2 = color0.g;
			pBlock->m_hi.m_b2 = color0.b;
			pBlock->m_hi.m_r3 = color1.r;
			pBlock->m_hi.m_g3 = color1.g;
			pBlock->m_hi.m_b3 = color1.b;
			pBlock->m_sels[0] = blk.m_selectors[0];
			pBlock->m_sels[1] = blk.m_selectors[1];
			pBlock->m_sels[2] = blk.m_selectors[2];
			pBlock->m_sels[3] = blk.m_selectors[3];

			static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
			pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
			pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
			pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
			pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
			pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
			pBlock->m_hi.m_r2 = color0.r;
			pBlock->m_hi.m_g2 = color0.g;
			pBlock->m_hi.m_b2 = color0.b;
			pBlock->m_hi.m_r3 = color1.r;
			pBlock->m_hi.m_g3 = color1.g;
			pBlock->m_hi.m_b3 = color1.b;
			pBlock->m_sels[4] = blk.m_selectors[0];
			pBlock->m_sels[5] = blk.m_selectors[1];
			pBlock->m_sels[6] = blk.m_selectors[2];
			pBlock->m_sels[7] = blk.m_selectors[3];
	static dxt_selector_range s_dxt5a_selector_ranges[] =;

	const uint32_t NUM_DXT5A_SELECTOR_RANGES =;

	struct etc1_g_to_dxt5a_conversion

	static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =;

	struct dxt5a_block

	static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)


	static const  uint16_t g_pvrtc_swizzle_table[256] =;

	// Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
	static const uint8_t g_pvrtc_5[32] =;
	static const uint8_t g_pvrtc_4[16] =;
	static const uint8_t g_pvrtc_3[8] =;
	static const uint8_t g_pvrtc_alpha[9] =;
	static const uint8_t g_pvrtc_5_floor[256] =;

	static const uint8_t g_pvrtc_5_ceil[256] =;
	static const uint8_t g_pvrtc_4_floor[256] =;

	static const uint8_t g_pvrtc_4_ceil[256] =;
	static const uint8_t g_pvrtc_3_floor[256] =;

	static const uint8_t g_pvrtc_3_ceil[256] =;
	static const uint8_t g_pvrtc_alpha_floor[256] =;

	static const uint8_t g_pvrtc_alpha_ceil[256] =;

	struct pvrtc4_block

#if 0
	static const uint8_t g_pvrtc_bilinear_weights[16][4] =
		{ 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
		{ 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
		{ 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
		{ 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },

	struct pvrtc1_temp_block

	static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)

	static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)

	static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)

	static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)

	// TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
	static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
		const uint32_t x_mask = num_blocks_x - 1;
		const uint32_t y_mask = num_blocks_y - 1;
		const uint32_t x_bits = basisu::total_bits(x_mask);
		const uint32_t y_bits = basisu::total_bits(y_mask);
		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;

		uint32_t block_index = 0;

		// really 3x3
		int e0[4][4], e1[4][4];

		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
			const uint32_t* pE_rows[3];

			for (int ey = 0; ey < 3; ey++)
				int by = y + ey - 1; 

				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];

				pE_rows[ey] = pE;

				for (int ex = 0; ex < 3; ex++)
					int bx = 0 + ex - 1; 

					const uint32_t e = pE[bx & x_mask];

					e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
					e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;

			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];

			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
				const decoder_etc_block& src_block = pETC_Blocks[block_index];

				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);

				uint32_t swizzled = x_swizzle | y_swizzle;
				if (num_blocks_x != num_blocks_y)
					swizzled &= swizzle_mask;

					if (num_blocks_x > num_blocks_y)
						swizzled |= ((x >> min_bits) << (min_bits * 2));
						swizzled |= ((y >> min_bits) << (min_bits * 2));

				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];

				uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
				uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
				uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];

				const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
				int by = (base_r + base_g + base_b) * 16;
				int block_colors_y_x16[4];
				block_colors_y_x16[0] = by + pInten_table48[2];
				block_colors_y_x16[1] = by + pInten_table48[3];
				block_colors_y_x16[2] = by + pInten_table48[1];
				block_colors_y_x16[3] = by + pInten_table48[0];

					const uint32_t ex = 2;
					int bx = x + ex - 1;
					bx &= x_mask;

#define DO_ROW

#undef DO_ROW

				uint32_t mod = 0;

				uint32_t lookup_x[4];

#define DO_LOOKUP

#undef DO_LOOKUP

#define DO_PIX

					const uint32_t ex = 0, ey = 0;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(0, 0, 4, 4, 4, 4);
					DO_PIX(1, 0, 2, 6, 2, 6);
					DO_PIX(0, 1, 2, 2, 6, 6);
					DO_PIX(1, 1, 1, 3, 3, 9);

					const uint32_t ex = 1, ey = 0;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(2, 0, 8, 0, 8, 0);
					DO_PIX(3, 0, 6, 2, 6, 2);
					DO_PIX(2, 1, 4, 0, 12, 0);
					DO_PIX(3, 1, 3, 1, 9, 3);

					const uint32_t ex = 0, ey = 1;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(0, 2, 8, 8, 0, 0);
					DO_PIX(1, 2, 4, 12, 0, 0);
					DO_PIX(0, 3, 6, 6, 2, 2);
					DO_PIX(1, 3, 3, 9, 1, 3);

					const uint32_t ex = 1, ey = 1;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(2, 2, 16, 0, 0, 0);
					DO_PIX(3, 2, 12, 4, 0, 0);
					DO_PIX(2, 3, 12, 0, 4, 0);
					DO_PIX(3, 3, 9, 3, 3, 1);
#undef DO_PIX

				pDst_block->m_modulation = mod;

				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];

				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];

			} // x
		} // y

	static void fixup_pvrtc1_4_modulation_rgba(
		const decoder_etc_block* pETC_Blocks, 
		const uint32_t* pPVRTC_endpoints, 
		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
		const endpoint* pEndpoints, const selector* pSelectors)
		const uint32_t x_mask = num_blocks_x - 1;
		const uint32_t y_mask = num_blocks_y - 1;
		const uint32_t x_bits = basisu::total_bits(x_mask);
		const uint32_t y_bits = basisu::total_bits(y_mask);
		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;

		uint32_t block_index = 0;

		// really 3x3
		int e0[4][4], e1[4][4];

		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
			const uint32_t* pE_rows[3];

			for (int ey = 0; ey < 3; ey++)
				int by = y + ey - 1; 

				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];

				pE_rows[ey] = pE;

				for (int ex = 0; ex < 3; ex++)
					int bx = 0 + ex - 1; 

					const uint32_t e = pE[bx & x_mask];

					e0[ex][ey] = get_endpoint_l8(e, 0);
					e1[ex][ey] = get_endpoint_l8(e, 1);

			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];

			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
				const decoder_etc_block& src_block = pETC_Blocks[block_index];
				const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
				const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
				const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
				uint32_t swizzled = x_swizzle | y_swizzle;
				if (num_blocks_x != num_blocks_y)
					swizzled &= swizzle_mask;

					if (num_blocks_x > num_blocks_y)
						swizzled |= ((x >> min_bits) << (min_bits * 2));
						swizzled |= ((y >> min_bits) << (min_bits * 2));

				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];

				uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
				uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
				uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];

				const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
				int by = (base_r + base_g + base_b) * 16;
				int block_colors_y_x16[4];
				block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
				block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
				block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
				block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);

				uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
				const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
				int alpha_block_colors_x16[4];
				alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
				alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
				alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
				alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);

				// clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)

					const uint32_t ex = 2;
					int bx = x + ex - 1;
					bx &= x_mask;

#define DO_ROW

#undef DO_ROW

				uint32_t mod = 0;

#define DO_PIX

					const uint32_t ex = 0, ey = 0;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(0, 0, 4, 4, 4, 4);
					DO_PIX(1, 0, 2, 6, 2, 6);
					DO_PIX(0, 1, 2, 2, 6, 6);
					DO_PIX(1, 1, 1, 3, 3, 9);

					const uint32_t ex = 1, ey = 0;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(2, 0, 8, 0, 8, 0);
					DO_PIX(3, 0, 6, 2, 6, 2);
					DO_PIX(2, 1, 4, 0, 12, 0);
					DO_PIX(3, 1, 3, 1, 9, 3);

					const uint32_t ex = 0, ey = 1;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(0, 2, 8, 8, 0, 0);
					DO_PIX(1, 2, 4, 12, 0, 0);
					DO_PIX(0, 3, 6, 6, 2, 2);
					DO_PIX(1, 3, 3, 9, 1, 3);

					const uint32_t ex = 1, ey = 1;
					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
					DO_PIX(2, 2, 16, 0, 0, 0);
					DO_PIX(3, 2, 12, 4, 0, 0);
					DO_PIX(2, 3, 12, 0, 4, 0);
					DO_PIX(3, 3, 9, 3, 3, 1);
#undef DO_PIX

				pDst_block->m_modulation = mod;

				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];

				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];

			} // x
		} // y

	static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =;

	const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES =;

	static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
	const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS =;
	static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =;

	struct etc1_to_bc7_m5_solution
	static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] =;
	static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =;

	const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES =;

	static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];

	struct etc1_g_to_bc7_m5a_conversion

	static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =;
	static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)

	struct bc7_mode_5

	static void create_etc1_to_bc7_m5_color_conversion_table()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 127; hi++)
							for (uint32_t lo = 0; lo <= 127; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 1) | (lo >> 6);
								colors[3] = (hi << 1) | (hi >> 6);

								colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
								colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten


	static void create_etc1_to_bc7_m5_alpha_conversion_table()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;

					uint32_t best_lo = 0;
					uint32_t best_hi = 0;
					uint64_t best_err = UINT64_MAX;
					uint32_t best_output_selectors = 0;

					for (uint32_t hi = 0; hi <= 255; hi++)
						for (uint32_t lo = 0; lo <= 255; lo++)
							uint32_t colors[4];

							colors[0] = lo;
							colors[3] = hi;

							colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
							colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;

							uint64_t total_err = 0;
							uint32_t output_selectors = 0;

							for (uint32_t s = low_selector; s <= high_selector; s++)
								int best_mapping_err = INT_MAX;
								int best_k = 0;
								for (int k = 0; k < 4; k++)
									int mapping_err = block_colors[s].g - colors[k];
									mapping_err *= mapping_err;

									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										mapping_err *= 5;

									if (mapping_err < best_mapping_err)
										best_mapping_err = mapping_err;
										best_k = k;
								} // k
								total_err += best_mapping_err;
								output_selectors |= (best_k << (s * 2));
							} // s

							if (total_err < best_err)
								best_err = total_err;
								best_lo = lo;
								best_hi = hi;
								best_output_selectors = output_selectors;

						} // lo
					} // hi
					fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
					if ((n & 31) == 31)
						fprintf(pFile, "\n");

				} // sr
			} // g
		} // inten


	struct bc7_m5_match_entry

	static bc7_m5_match_entry g_bc7_m5_equals_1[256] =;
	static void transcoder_init_bc7_mode5()

	static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)

	static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)

	static const uint8_t g_etc2_eac_a8_sel4[6] =;

	static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)

	static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =;

	static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)

	struct etc1_to_astc_solution

	static dxt_selector_range g_etc1_to_astc_selector_ranges[] =;


	static uint32_t g_etc1_to_astc_selector_range_index[4][4];

	static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =;

	static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] =;

	// The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
	static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
	static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] =;
	static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];

	static uint32_t g_ise_to_unquant[48];

	static void create_etc1_to_astc_conversion_table_0_47()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;

					uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint64_t highest_best_err = 0;

					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 47; hi++)
							for (uint32_t lo = 0; lo <= 47; lo++)
								uint32_t colors[4];

								for (uint32_t s = 0; s < 4; s++)
									uint32_t s_scaled = s | (s << 2) | (s << 4);
									if (s_scaled > 32)

									uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
									uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
									colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 8;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						mapping_best_low[m] = best_lo;
						mapping_best_high[m] = best_hi;
						mapping_best_err[m] = best_err;
						highest_best_err = basisu::maximum(highest_best_err, best_err);
					} // m

					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
						uint64_t err = mapping_best_err[m];

						err = basisu::minimum<uint64_t>(err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);

						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m

				} // sr
			} // g
		} // inten


	static void create_etc1_to_astc_conversion_table_0_255()
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;

					uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
					uint64_t highest_best_err = 0;

					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 255; hi++)
							for (uint32_t lo = 0; lo <= 255; lo++)
								uint32_t colors[4];

								for (uint32_t s = 0; s < 4; s++)
									uint32_t s_scaled = s | (s << 2) | (s << 4);
									if (s_scaled > 32)

									uint32_t c0 = lo | (lo << 8);
									uint32_t c1 = hi | (hi << 8);
									colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];

									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									int err_scale = 1;
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 8;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						mapping_best_low[m] = best_lo;
						mapping_best_high[m] = best_hi;
						mapping_best_err[m] = best_err;
						highest_best_err = basisu::maximum(highest_best_err, best_err);
					} // m

					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
						uint64_t err = mapping_best_err[m];

						err = basisu::minimum<uint64_t>(err, 0xFFFF);
						fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m

				} // sr
			} // g
		} // inten



	// Table encodes 5 trits to 8 output bits. 3^5 entries.
	// Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
	static const uint8_t g_astc_trit_encode[243] =;

	// Extracts bits [low,high]
	static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)

	// Writes bits to output in an endian safe way
	static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)

	// Encodes 5 values to output, usable for any range that uses trits and bits
	static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)

	struct astc_block_params
	// Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). 
	// We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
	// Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
	// 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. 
	// Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
	// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
	// 32 total weights, stored as 16 CA CA, each ranging from 0-3.
	static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)

	// CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights 
	// This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
	static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)

	// Optional 8-bit endpoint packing functions.

	// CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
	static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)

	// CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
	static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)

	// Optimal quantized [0,47] entry to use given [0,255] input
	static uint8_t g_astc_single_color_encoding_0[256];

	// Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
	static struct
	{} g_astc_single_color_encoding_1[256];
	static void transcoder_init_astc()

	// Converts opaque or color+alpha ETC1S block to ASTC 4x4.
	// This function tries to use the best ASTC mode given the block's actual contents.
	static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, 
		bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)

	// ATC and PVRTC2 both use these tables.
	struct etc1s_to_atc_solution
		uint8_t m_lo;
		uint8_t m_hi;
		uint16_t m_err;

	static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
		{ 0, 3 },
		{ 1, 3 },
		{ 0, 2 },
		{ 1, 2 },
		{ 2, 3 },
		{ 0, 1 },

	const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);

	static uint32_t g_etc1s_to_atc_selector_range_index[4][4];

	const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
	static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
		{ 0, 0, 1, 1 },
		{ 0, 0, 1, 2 },
		{ 0, 0, 1, 3 },
		{ 0, 0, 2, 3 },
		{ 0, 1, 1, 1 },
		{ 0, 1, 2, 2 },
		{ 0, 1, 2, 3 }, //6 - identity
		{ 0, 2, 3, 3 },
		{ 1, 2, 2, 2 },
		{ 1, 2, 3, 3 },

	static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
#include "basisu_transcoder_tables_pvrtc2_45.inc"

#if 0
	static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"


	static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
#include "basisu_transcoder_tables_atc_55.inc"

	static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
#include "basisu_transcoder_tables_atc_56.inc"

	struct atc_match_entry
		uint8_t m_lo;
		uint8_t m_hi;
	static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
	static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];

	static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
		for (int i = 0; i < 256; i++)
			int lowest_e = 256;
			for (int lo = 0; lo < size0; lo++)
				int lo_e = lo;
				if (size0 == 16)
					lo_e = (lo_e << 1) | (lo_e >> 3);
					lo_e = (lo_e << 3) | (lo_e >> 2);
				else if (size0 == 32)
					lo_e = (lo_e << 3) | (lo_e >> 2);
					lo_e = (lo_e << 2) | (lo_e >> 4);

				for (int hi = 0; hi < size1; hi++)
					int hi_e = hi;
					if (size1 == 16)
						// This is only for PVRTC2 - expand to 5 then 8
						hi_e = (hi_e << 1) | (hi_e >> 3);
						hi_e = (hi_e << 3) | (hi_e >> 2);
					else if (size1 == 32)
						hi_e = (hi_e << 3) | (hi_e >> 2);
						hi_e = (hi_e << 2) | (hi_e >> 4);

					int e;

					if (sel == 1)
						// Selector 1
						e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
						assert(sel == 3);

						// Selector 3
						e = abs(hi_e - i);

					if (e < lowest_e)
						pTable[i].m_lo = static_cast<uint8_t>(lo);
						pTable[i].m_hi = static_cast<uint8_t>(hi);

						lowest_e = e;

				} // hi
			} // lo
		} // i

	static void transcoder_init_atc()
		prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
		prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); 
		prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); 

		prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
		prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
		prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);

		for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
			uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
			uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
			g_etc1s_to_atc_selector_range_index[l][h] = i;

	struct atc_block
		uint8_t m_lo[2];
		uint8_t m_hi[2];
		uint8_t m_sels[4];

		void set_low_color(uint32_t r, uint32_t g, uint32_t b)
			assert((r < 32) && (g < 32) && (b < 32));
			uint32_t x = (r << 10) | (g << 5) | b;
			m_lo[0] = x & 0xFF;
			m_lo[1] = (x >> 8) & 0xFF;

		void set_high_color(uint32_t r, uint32_t g, uint32_t b)
			assert((r < 32) && (g < 64) && (b < 32));
			uint32_t x = (r << 11) | (g << 5) | b;
			m_hi[0] = x & 0xFF;
			m_hi[1] = (x >> 8) & 0xFF;

	static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
		atc_block* pBlock = static_cast<atc_block*>(pDst);

		const uint32_t low_selector = pSelector->m_lo_selector;
		const uint32_t high_selector = pSelector->m_hi_selector;

		const color32& base_color = pEndpoints->m_color5;
		const uint32_t inten_table = pEndpoints->m_inten5;

		if (low_selector == high_selector)
			uint32_t r, g, b;
			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);

			pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
			pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
			pBlock->m_sels[0] = 0x55;
			pBlock->m_sels[1] = 0x55;
			pBlock->m_sels[2] = 0x55;
			pBlock->m_sels[3] = 0x55;

		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
			color32 block_colors[4];
			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);

			const uint32_t r0 = block_colors[0].r;
			const uint32_t g0 = block_colors[0].g;
			const uint32_t b0 = block_colors[0].b;

			const uint32_t r1 = block_colors[3].r;
			const uint32_t g1 = block_colors[3].g;
			const uint32_t b1 = block_colors[3].b;

			pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
			pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);

			pBlock->m_sels[0] = pSelector->m_selectors[0];
			pBlock->m_sels[1] = pSelector->m_selectors[1];
			pBlock->m_sels[2] = pSelector->m_selectors[2];
			pBlock->m_sels[3] = pSelector->m_selectors[3];


		const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];

		const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
		const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
		const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];

		uint32_t best_err = UINT_MAX;
		uint32_t best_mapping = 0;

#define DO_ITER
		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
#undef DO_ITER

		pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
		pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);

			pBlock->m_sels[0] = pSelector->m_selectors[0];
			pBlock->m_sels[1] = pSelector->m_selectors[1];
			pBlock->m_sels[2] = pSelector->m_selectors[2];
			pBlock->m_sels[3] = pSelector->m_selectors[3];
			const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];

			const uint32_t sel_bits0 = pSelector->m_selectors[0];
			const uint32_t sel_bits1 = pSelector->m_selectors[1];
			const uint32_t sel_bits2 = pSelector->m_selectors[2];
			const uint32_t sel_bits3 = pSelector->m_selectors[3];

			uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;

#define DO_X

#undef DO_X

			pBlock->m_sels[0] = (uint8_t)atc_sels0;
			pBlock->m_sels[1] = (uint8_t)atc_sels1;
			pBlock->m_sels[2] = (uint8_t)atc_sels2;
			pBlock->m_sels[3] = (uint8_t)atc_sels3;

	static void create_etc1s_to_atc_conversion_tables()
		// ATC 55
		FILE* pFile = nullptr;
		fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");

		uint32_t n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 31; hi++)
							for (uint32_t lo = 0; lo <= 31; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 3) | (lo >> 2);
								colors[3] = (hi << 3) | (hi >> 2);

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten

		pFile = nullptr;

		// ATC 56
		fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");

		n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 63; hi++)
							for (uint32_t lo = 0; lo <= 31; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 3) | (lo >> 2);
								colors[3] = (hi << 2) | (hi >> 4);

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten

		// PVRTC2 45
		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");

		n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 31; hi++)
							for (uint32_t lo = 0; lo <= 15; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 1) | (lo >> 3);
								colors[0] = (colors[0] << 3) | (colors[0] >> 2);

								colors[3] = (hi << 3) | (hi >> 2);

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten


#if 0
		// PVRTC2 34
		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");

		n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 15; hi++)
							for (uint32_t lo = 0; lo <= 7; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 2) | (lo >> 1);
								colors[0] = (colors[0] << 3) | (colors[0] >> 2);

								colors[3] = (hi << 1) | (hi >> 3);
								colors[3] = (colors[3] << 3) | (colors[3] >> 2);

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten

#if 0
		// PVRTC2 44
		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");

		n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 15; hi++)
							for (uint32_t lo = 0; lo <= 15; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 1) | (lo >> 3);
								colors[0] = (colors[0] << 3) | (colors[0] >> 2);

								colors[3] = (hi << 1) | (hi >> 3);
								colors[3] = (colors[3] << 3) | (colors[3] >> 2);

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten


		// PVRTC2 alpha 33
		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");

		n = 0;

		for (int inten = 0; inten < 8; inten++)
			for (uint32_t g = 0; g < 32; g++)
				color32 block_colors[4];
				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);

				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;

					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
						uint32_t best_lo = 0;
						uint32_t best_hi = 0;
						uint64_t best_err = UINT64_MAX;

						for (uint32_t hi = 0; hi <= 7; hi++)
							for (uint32_t lo = 0; lo <= 7; lo++)
								uint32_t colors[4];

								colors[0] = (lo << 1);
								colors[0] = (colors[0] << 4) | colors[0];

								colors[3] = (hi << 1) | 1;
								colors[3] = (colors[3] << 4) | colors[3];

								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;

								uint64_t total_err = 0;

								for (uint32_t s = low_selector; s <= high_selector; s++)
									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];

									int err_scale = 1;
									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
									// the low/high selectors which are clamping to either 0 or 255.
									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
										err_scale = 5;

									total_err += (err * err) * err_scale;

								if (total_err < best_err)
									best_err = total_err;
									best_lo = lo;
									best_hi = hi;

						//assert(best_err <= 0xFFFF);
						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);

						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
						if ((n & 31) == 31)
							fprintf(pFile, "\n");
					} // m
				} // sr
			} // g
		} // inten



	struct pvrtc2_block
		uint8_t m_modulation[4];

				// Opaque mode: RGB colora=554 and colorb=555
					uint32_t m_mod_flag : 1;
					uint32_t m_blue_a : 4;
					uint32_t m_green_a : 5;
					uint32_t m_red_a : 5;
					uint32_t m_hard_flag : 1;
					uint32_t m_blue_b : 5;
					uint32_t m_green_b : 5;
					uint32_t m_red_b : 5;
					uint32_t m_opaque_flag : 1;

				} m_opaque_color_data;

				// Transparent mode: RGBA colora=4433 and colorb=4443
					uint32_t m_mod_flag : 1;
					uint32_t m_blue_a : 3;
					uint32_t m_green_a : 4;
					uint32_t m_red_a : 4;
					uint32_t m_alpha_a : 3;
					uint32_t m_hard_flag : 1;
					uint32_t m_blue_b : 4;
					uint32_t m_green_b : 4;
					uint32_t m_red_b : 4;
					uint32_t m_alpha_b : 3;
					uint32_t m_opaque_flag : 1;

				} m_trans_color_data;

			uint32_t m_color_data_bits;

		// 554
		void set_low_color(uint32_t r, uint32_t g, uint32_t b)
			assert((r < 32) && (g < 32) && (b < 16));
			m_opaque_color_data.m_red_a = r;
			m_opaque_color_data.m_green_a = g;
			m_opaque_color_data.m_blue_a = b;

		// 555
		void set_high_color(uint32_t r, uint32_t g, uint32_t b)
			assert((r < 32) && (g < 32) && (b < 32));
			m_opaque_color_data.m_red_b = r;
			m_opaque_color_data.m_green_b = g;
			m_opaque_color_data.m_blue_b = b;

		// 4433
		void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
			assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
			m_trans_color_data.m_red_a = r;
			m_trans_color_data.m_green_a = g;
			m_trans_color_data.m_blue_a = b;
			m_trans_color_data.m_alpha_a = a;

		// 4443
		void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
			assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
			m_trans_color_data.m_red_b = r;
			m_trans_color_data.m_green_b = g;
			m_trans_color_data.m_blue_b = b;
			m_trans_color_data.m_alpha_b = a;

	static struct
		uint8_t m_l, m_h;
	} g_pvrtc2_trans_match34[256];

	static struct
		uint8_t m_l, m_h;
	} g_pvrtc2_trans_match44[256];
	static struct
		uint8_t m_l, m_h;
	} g_pvrtc2_alpha_match33[256];
	static struct
		uint8_t m_l, m_h;
	} g_pvrtc2_alpha_match33_0[256];

	static struct
		uint8_t m_l, m_h;
	} g_pvrtc2_alpha_match33_3[256];
	// PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
	static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
		pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);

		pBlock->m_opaque_color_data.m_hard_flag = 1;
		pBlock->m_opaque_color_data.m_mod_flag = 0;
		pBlock->m_opaque_color_data.m_opaque_flag = 1;

		const uint32_t low_selector = pSelector->m_lo_selector;
		const uint32_t high_selector = pSelector->m_hi_selector;

		const color32& base_color = pEndpoints->m_color5;
		const uint32_t inten_table = pEndpoints->m_inten5;

		if (low_selector == high_selector)
			uint32_t r, g, b;
			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);

			pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
			pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);

			pBlock->m_modulation[0] = 0x55;
			pBlock->m_modulation[1] = 0x55;
			pBlock->m_modulation[2] = 0x55;
			pBlock->m_modulation[3] = 0x55;

		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
			color32 block_colors[4];
			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);

			const uint32_t r0 = block_colors[0].r;
			const uint32_t g0 = block_colors[0].g;
			const uint32_t b0 = block_colors[0].b;

			const uint32_t r1 = block_colors[3].r;
			const uint32_t g1 = block_colors[3].g;
			const uint32_t b1 = block_colors[3].b;

			pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
			pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);

			pBlock->m_modulation[0] = pSelector->m_selectors[0];
			pBlock->m_modulation[1] = pSelector->m_selectors[1];
			pBlock->m_modulation[2] = pSelector->m_selectors[2];
			pBlock->m_modulation[3] = pSelector->m_selectors[3];


		const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];

		const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
		const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
		const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];

		uint32_t best_err = UINT_MAX;
		uint32_t best_mapping = 0;

#define DO_ITER
		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
#undef DO_ITER

		pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
		pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);

			pBlock->m_modulation[0] = pSelector->m_selectors[0];
			pBlock->m_modulation[1] = pSelector->m_selectors[1];
			pBlock->m_modulation[2] = pSelector->m_selectors[2];
			pBlock->m_modulation[3] = pSelector->m_selectors[3];
			// TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
			const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];

			const uint32_t sel_bits0 = pSelector->m_selectors[0];
			const uint32_t sel_bits1 = pSelector->m_selectors[1];
			const uint32_t sel_bits2 = pSelector->m_selectors[2];
			const uint32_t sel_bits3 = pSelector->m_selectors[3];

			uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;

#define DO_X

#undef DO_X

			pBlock->m_modulation[0] = (uint8_t)sels0;
			pBlock->m_modulation[1] = (uint8_t)sels1;
			pBlock->m_modulation[2] = (uint8_t)sels2;
			pBlock->m_modulation[3] = (uint8_t)sels3;

	typedef struct { float c[4]; } vec4F;
	static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x;	pV->c[3] = x;	return pV; }
	static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x;	pV->c[1] = y;	pV->c[2] = z;	pV->c[3] = w;	return pV; }
	static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
	static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
	static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
	static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
	static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
	static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
	static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
	static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }

	static color32 convert_rgba_5554_to_8888(const color32& col)
		return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);

	static inline int sq(int x) { return x * x; }
	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. 
	// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! 
	// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
	static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
		pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);

		const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
		const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];

		pBlock->m_opaque_color_data.m_hard_flag = 1;
		pBlock->m_opaque_color_data.m_mod_flag = 0;
		pBlock->m_opaque_color_data.m_opaque_flag = 0;

		const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;

		const color32& alpha_base_color = alpha_endpoint.m_color5;
		const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;

		int constant_alpha_val = -1;

		int alpha_block_colors[4];
		decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);

		if (num_unique_alpha_selectors == 1)
			constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
			constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];

			for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
				if (constant_alpha_val != alpha_block_colors[i])
					constant_alpha_val = -1;

		if (constant_alpha_val >= 250)
			// It's opaque enough, so don't bother trying to encode it as an alpha block.
			convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);

		const color32& base_color = pEndpoints->m_color5;
		const uint32_t inten_table = pEndpoints->m_inten5;

		const uint32_t low_selector = pSelector->m_lo_selector;
		const uint32_t high_selector = pSelector->m_hi_selector;

		const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
		// We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
		// Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
		const int br = (base_color.r << 3) | (base_color.r >> 2);
		const int bg = (base_color.g << 3) | (base_color.g >> 2);
		const int bb = (base_color.b << 3) | (base_color.b >> 2);
		color32 block_cols[4];
		for (uint32_t i = 0; i < 4; i++)
			const int ci = g_etc1_inten_tables[inten_table][i];
			block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);

		bool solid_color_block = true;
		if (num_unique_color_selectors > 1)
			for (uint32_t i = low_selector + 1; i <= high_selector; i++)
				if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
					solid_color_block = false;

		if ((solid_color_block) && (constant_alpha_val >= 0))
			// Constant color/alpha block.
			// This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
			uint32_t r, g, b;
			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);

			// Mod 0
			uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; 
			uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;

			uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
			uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
			uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
			uint32_t ca0 = (la0 << 1);
			cr0 = (cr0 << 3) | (cr0 >> 2);
			cg0 = (cg0 << 3) | (cg0 >> 2);
			cb0 = (cb0 << 3) | (cb0 >> 2);
			ca0 = (ca0 << 4) | ca0;

			uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;

			// If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
			if ((err0 == 0) || (constant_alpha_val < 3))
				pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
				pBlock->set_trans_high_color(0, 0, 0, 0);

				pBlock->m_modulation[0] = 0;
				pBlock->m_modulation[1] = 0;
				pBlock->m_modulation[2] = 0;
				pBlock->m_modulation[3] = 0;

			// Mod 3
			uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
			uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;

			uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
			uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
			uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
			uint32_t ca3 = (la3 << 1) | 1;
			cr3 = (cr3 << 3) | (cr3 >> 2);
			cg3 = (cg3 << 3) | (cg3 >> 2);
			cb3 = (cb3 << 3) | (cb3 >> 2);
			ca3 = (ca3 << 4) | ca3;

			uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
			// Mod 1
			uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
			uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
			uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;

			uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
			uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
			uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
			uint32_t cla1 = (la1 << 1);

			clr1 = (clr1 << 3) | (clr1 >> 2);
			clg1 = (clg1 << 3) | (clg1 >> 2);
			clb1 = (clb1 << 3) | (clb1 >> 2);
			cla1 = (cla1 << 4) | cla1;

			uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
			uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
			uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
			uint32_t cha1 = (ha1 << 1) | 1;

			chr1 = (chr1 << 3) | (chr1 >> 2);
			chg1 = (chg1 << 3) | (chg1 >> 2);
			chb1 = (chb1 << 3) | (chb1 >> 2);
			cha1 = (cha1 << 4) | cha1;

			uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
			uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
			uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
			uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;

			uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;

			if ((err1 < err0) && (err1 < err3))
				pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
				pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);

				pBlock->m_modulation[0] = 0x55;
				pBlock->m_modulation[1] = 0x55;
				pBlock->m_modulation[2] = 0x55;
				pBlock->m_modulation[3] = 0x55;
			else if (err0 < err3)
				pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
				pBlock->set_trans_high_color(0, 0, 0, 0);

				pBlock->m_modulation[0] = 0;
				pBlock->m_modulation[1] = 0;
				pBlock->m_modulation[2] = 0;
				pBlock->m_modulation[3] = 0;
				pBlock->set_trans_low_color(0, 0, 0, 0);
				pBlock->set_trans_high_color(lr3, lg3, lb3, la3);

				pBlock->m_modulation[0] = 0xFF;
				pBlock->m_modulation[1] = 0xFF;
				pBlock->m_modulation[2] = 0xFF;
				pBlock->m_modulation[3] = 0xFF;


		// It's a complex block with non-solid color and/or alpha pixels.
		vec4F minColor, maxColor;

		if (solid_color_block)
			// It's a solid color block.
			uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
			uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
			const float S = 1.0f / 255.0f;
			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
			vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
		else if (constant_alpha_val >= 0)
			// It's a solid alpha block.
			const float S = 1.0f / 255.0f;
			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
			vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
		// See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). 
		// To keep quality up we need to use full 4D PCA in this case.
		else	if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
				(block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
				(block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
				(block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
			// Find principle component of RGBA colors treated as 4D vectors.
			color32 pixels[16];

			uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
			for (uint32_t i = 0; i < 16; i++)
				color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
				uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;

				pixels[i].set(rgb.r, rgb.g, rgb.b, a);

				sum_r += rgb.r;
				sum_g += rgb.g;
				sum_b += rgb.b;
				sum_a += a;

			vec4F meanColor;
			vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
			vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);

			meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));

			vec4F axis;
			vec4F_set_scalar(&axis, 0.0f);
			// Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
			for (uint32_t i = 0; i < 16; i++)
				vec4F color = vec4F_from_color(&pixels[i]);
				color = vec4F_sub(&color, &meanColorScaled);
				vec4F a = vec4F_mul(&color, color.c[0]);
				vec4F b = vec4F_mul(&color, color.c[1]);
				vec4F c = vec4F_mul(&color, color.c[2]);
				vec4F d = vec4F_mul(&color, color.c[3]);
				vec4F n = i ? axis : color;
				axis.c[0] += vec4F_dot(&a, &n);
				axis.c[1] += vec4F_dot(&b, &n);
				axis.c[2] += vec4F_dot(&c, &n);
				axis.c[3] += vec4F_dot(&d, &n);

			if (vec4F_dot(&axis, &axis) < .5f)
				vec4F_set_scalar(&axis, .5f);

			float l = 1e+9f, h = -1e+9f;

			for (uint32_t i = 0; i < 16; i++)
				vec4F color = vec4F_from_color(&pixels[i]);

				vec4F q = vec4F_sub(&color, &meanColorScaled);
				float d = vec4F_dot(&q, &axis);

				l = basisu::minimum(l, d);
				h = basisu::maximum(h, d);

			l *= (1.0f / 255.0f);
			h *= (1.0f / 255.0f);

			vec4F b0 = vec4F_mul(&axis, l);
			vec4F b1 = vec4F_mul(&axis, h);
			vec4F c0 = vec4F_add(&meanColor, &b0);
			vec4F c1 = vec4F_add(&meanColor, &b1);
			minColor = vec4F_saturate(&c0);
			maxColor = vec4F_saturate(&c1);
			if (minColor.c[3] > maxColor.c[3])
				// VS 2019 release Code Generator issue
				//std::swap(minColor, maxColor);

				float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
				minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
				minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
				maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
			// We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
			// We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
			uint32_t block_cols_l[4], block_cols_a[4];
			for (uint32_t i = 0; i < 4; i++)
				block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
				block_cols_a[i] = block_cols[i].a * 3;

			int p0_min = INT_MAX, p0_max = INT_MIN;
			int p1_min = INT_MAX, p1_max = INT_MIN;
			for (uint32_t y = 0; y < 4; y++)
				const uint32_t cs = pSelector->m_selectors[y];
				const uint32_t as = alpha_selectors.m_selectors[y];

					const int l = block_cols_l[cs & 3];
					const int a = block_cols_a[as & 3];
					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
					const int l = block_cols_l[(cs >> 2) & 3];
					const int a = block_cols_a[(as >> 2) & 3];
					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
					const int l = block_cols_l[(cs >> 4) & 3];
					const int a = block_cols_a[(as >> 4) & 3];
					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
					const int l = block_cols_l[cs >> 6];
					const int a = block_cols_a[as >> 6];
					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);

			int dist0 = p0_max - p0_min;
			int dist1 = p1_max - p1_min;

			const float S = 1.0f / 255.0f;

			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
			vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);

			// See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
			if (dist1 > dist0)
				std::swap(minColor.c[0], maxColor.c[0]);
				std::swap(minColor.c[1], maxColor.c[1]);
				std::swap(minColor.c[2], maxColor.c[2]);

		// 4433 4443
		color32 trialMinColor, trialMaxColor;
		trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
		trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
		pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
		pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);

		color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
		color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);

		color32 color0(convert_rgba_5554_to_8888(color_a));
		color32 color3(convert_rgba_5554_to_8888(color_b));

		const int lr = color0.r;
		const int lg = color0.g;
		const int lb = color0.b;
		const int la = color0.a;

		const int axis_r = color3.r - lr;
		const int axis_g = color3.g - lg;
		const int axis_b = color3.b - lb;
		const int axis_a = color3.a - la;
		const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);

		const int thresh01 = (len_a * 3) / 16;
		const int thresh12 = len_a >> 1;
		const int thresh23 = (len_a * 13) / 16;

		if ((axis_r | axis_g | axis_b) == 0)
			int ca_sel[4];

			for (uint32_t i = 0; i < 4; i++)
				int ca = (block_cols[i].a - la) * axis_a;
				ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);

			for (uint32_t y = 0; y < 4; y++)
				const uint32_t a_sels = alpha_selectors.m_selectors[y];

				uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);

				pBlock->m_modulation[y] = (uint8_t)sel;
			int cy[4], ca[4];

			for (uint32_t i = 0; i < 4; i++)
				cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
				ca[i] = (block_cols[i].a - la) * axis_a;

			for (uint32_t y = 0; y < 4; y++)
				const uint32_t c_sels = pSelector->m_selectors[y];
				const uint32_t a_sels = alpha_selectors.m_selectors[y];

				const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
				const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
				const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
				const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];

				uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
					(((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
					(((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
					(((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);

				pBlock->m_modulation[y] = (uint8_t)sel;
	static void transcoder_init_pvrtc2()
		for (uint32_t v = 0; v < 256; v++)
			int best_l = 0, best_h = 0, lowest_err = INT_MAX;

			for (uint32_t l = 0; l < 8; l++)
				uint32_t le = (l << 1);
				le = (le << 4) | le;

				for (uint32_t h = 0; h < 8; h++)
					uint32_t he = (h << 1) | 1;
					he = (he << 4) | he;

					uint32_t m = (le * 5 + he * 3) / 8;

					int err = (int)labs((int)v - (int)m);
					if (err < lowest_err)
						lowest_err = err;
						best_l = l;
						best_h = h;

			g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
			g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;

		for (uint32_t v = 0; v < 256; v++)
			int best_l = 0, best_h = 0, lowest_err = INT_MAX;

			for (uint32_t l = 0; l < 8; l++)
				uint32_t le = (l << 1);
				le = (le << 4) | le;

				int err = (int)labs((int)v - (int)le);
				if (err < lowest_err)
					lowest_err = err;
					best_l = l;
					best_h = l;

			g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
			g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;

		for (uint32_t v = 0; v < 256; v++)
			int best_l = 0, best_h = 0, lowest_err = INT_MAX;

			for (uint32_t h = 0; h < 8; h++)
				uint32_t he = (h << 1) | 1;
				he = (he << 4) | he;

				int err = (int)labs((int)v - (int)he);
				if (err < lowest_err)
					lowest_err = err;
					best_l = h;
					best_h = h;

			g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
			g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;

		for (uint32_t v = 0; v < 256; v++)
			int best_l = 0, best_h = 0, lowest_err = INT_MAX;

			for (uint32_t l = 0; l < 8; l++)
				uint32_t le = (l << 2) | (l >> 1);
				le = (le << 3) | (le >> 2);

				for (uint32_t h = 0; h < 16; h++)
					uint32_t he = (h << 1) | (h >> 3);
					he = (he << 3) | (he >> 2);

					uint32_t m = (le * 5 + he * 3) / 8;

					int err = (int)labs((int)v - (int)m);
					if (err < lowest_err)
						lowest_err = err;
						best_l = l;
						best_h = h;

			g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
			g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
		for (uint32_t v = 0; v < 256; v++)
			int best_l = 0, best_h = 0, lowest_err = INT_MAX;

			for (uint32_t l = 0; l < 16; l++)
				uint32_t le = (l << 1) | (l >> 3);
				le = (le << 3) | (le >> 2);

				for (uint32_t h = 0; h < 16; h++)
					uint32_t he = (h << 1) | (h >> 3);
					he = (he << 3) | (he >> 2);

					uint32_t m = (le * 5 + he * 3) / 8;

					int err = (int)labs((int)v - (int)m);
					if (err < lowest_err)
						lowest_err = err;
						best_l = l;
						best_h = h;

			g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
			g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;

	basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :{}

	bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
		uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
		uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)

	bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)

	bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
		uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
		basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels)

	bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
		uint32_t output_blocks_buf_size_in_blocks_or_pixels,
		uint32_t orig_width, uint32_t orig_height,
		uint32_t output_row_pitch_in_blocks_or_pixels,
		uint32_t output_rows_in_pixels,
		uint32_t total_slice_blocks)

	bool basisu_lowlevel_etc1s_transcoder::transcode_image(
			transcoder_texture_format target_format,
			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
			uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
			uint32_t decode_flags,
			bool basis_file_has_alpha_slices,
			bool is_video,
			uint32_t output_row_pitch_in_blocks_or_pixels,
			basisu_transcoder_state* pState,
			uint32_t output_rows_in_pixels)

	bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
	bool basisu_lowlevel_uastc_transcoder::transcode_image(
		transcoder_texture_format target_format,
		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
		const uint8_t* pCompressed_data, uint32_t compressed_data_length,
		uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
		uint32_t slice_offset, uint32_t slice_length,
		uint32_t decode_flags,
		bool has_alpha,
		bool is_video,
		uint32_t output_row_pitch_in_blocks_or_pixels,
		basisu_transcoder_state* pState,
		uint32_t output_rows_in_pixels,
		int channel0, int channel1)
	basisu_transcoder::basisu_transcoder() :{}

	bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const

	bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const

	bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const

	basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const

	bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const

	uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const

	basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const

	bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const

	uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const

	bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const

	bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const

	bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
	bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)

	bool basisu_transcoder::stop_transcoding()

	bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
		uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const

	int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const

	int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const

	void basisu_transcoder::write_opaque_alpha_blocks(
		uint32_t num_blocks_x, uint32_t num_blocks_y,
		void* pOutput_blocks, block_format fmt,
		uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)

	bool basisu_transcoder::transcode_image_level(
		const void* pData, uint32_t data_size,
		uint32_t image_index, uint32_t level_index,
		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
		transcoder_texture_format fmt,
		uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const

	uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)

	const char* basis_get_format_name(transcoder_texture_format fmt)

	const char* basis_get_block_format_name(block_format fmt)

	const char* basis_get_texture_type_name(basis_texture_type tex_type)

	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)

	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)

	bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)

	bool basis_block_format_is_uncompressed(block_format blk_fmt)
	uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
	uint32_t basis_get_block_width(transcoder_texture_format tex_type)

	uint32_t basis_get_block_height(transcoder_texture_format tex_type)
	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
	{}#if BASISD_SUPPORT_UASTCconst astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] =const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] =uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
	{}static const uint8_t g_zero_pattern[16] =const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =static const uint8_t g_uastc_huff_modes[128] =const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] =const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] =const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =int astc_get_levels(int range)
	{}astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]static struct
	{} g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =bool astc_is_valid_endpoint_range(uint32_t range)
	{}uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
	{}uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
	{}const uint32_t g_bc7_weights1[2] =const uint32_t g_bc7_weights2[4] =const uint32_t g_bc7_weights3[8] =const uint32_t g_bc7_weights4[16] =const uint32_t g_astc_weights4[16] =const uint32_t g_astc_weights5[32] =const uint32_t g_astc_weights_3levels[3] =const uint8_t g_bc7_partition1[16] =const uint8_t g_bc7_partition2[64 * 16] =const uint8_t g_bc7_partition3[64 * 16] =const uint8_t g_bc7_table_anchor_index_second_subset[64] =const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =const uint8_t g_bc7_num_subsets[8] =const uint8_t g_bc7_partition_bits[8] =const uint8_t g_bc7_color_index_bitcount[8] =const uint8_t g_bc7_mode_has_p_bits[8] =const uint8_t g_bc7_mode_has_shared_p_bits[8] =const uint8_t g_bc7_color_precision_table[8] =const int8_t g_bc7_alpha_precision_table[8] =const uint8_t g_bc7_alpha_index_bitcount[8] =endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]endpoint_err g_bc7_mode_5_optimal_endpoints[256]static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
	{}void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
	{}static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
	{}void pack_astc_solid_block(void* pDst_block, const color32& color)
	{}#ifdef _DEBUG#endifstatic const uint8_t g_astc_quint_encode[125] =static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
	{}static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
	{}const uint32_t ASTC_BLOCK_MODE_BITS =const uint32_t ASTC_PART_BITS =const uint32_t ASTC_CEM_BITS =const uint32_t ASTC_PARTITION_INDEX_BITS =const uint32_t ASTC_CCS_BITS =const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] =bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
	{}const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
	{}static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
	{}static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
	{}inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
	{}static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
	{}bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
	{}static const uint32_t* g_astc_weight_tables[6] =bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
	{}bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
	{}bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
	{}static void determine_shared_pbits(
		uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
		color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
	{}static void determine_unique_pbits(
		uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
		color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
	{}bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
	{}bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
	{}bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
	{}bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
	{}color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
	{}static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
	{}static const uint8_t s_etc1_solid_selectors[4][4] =struct etc_coord2
	{}const etc_coord2 g_etc1_pixel_coords[2][2][8] =void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
	{}bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
	{}static inline int gray_distance2(const uint8_t c, int y)
	{}static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
		int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
	{}static const uint16_t g_etc1_y_solid_block_configs[256] =static const uint16_t g_etc1_y_solid_block_4i_configs[256] =static const uint16_t g_etc1_y_solid_block_2i_configs[256] =static const uint16_t g_etc1_y_solid_block_1i_configs[256] =bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
	{}const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR =, ETC2_EAC_MAX_VALUE_SELECTOR =void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
	{}bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
	{}static const uint8_t s_uastc5_to_bc1[32] =static const uint8_t s_uastc4_to_bc1[16] =static const uint8_t s_uastc3_to_bc1[8] =static const uint8_t s_uastc2_to_bc1[4] =static const uint8_t s_uastc1_to_bc1[2] =const uint8_t* s_uastc_to_bc1_weights[6] =void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
	{}static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
	{}static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
	{}struct vec3F {}static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
	{}void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) 
	{}static inline uint8_t to_5(uint32_t v) {}static inline uint8_t to_6(uint32_t v) {}void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
	{}void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
	{}void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
	{}void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
	{}bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
	{}static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
	{}bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
	{}bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
	{}bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
	{}static const uint8_t s_etc2_eac_bit_ofs[16] =static void pack_eac_solid_block(eac_block& blk, uint32_t a)
	{}static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
	{}static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
	{}bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
	{}bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
	{}static void fixup_pvrtc1_4_modulation_rgb(
		const uastc_block* pSrc_blocks,
		const uint32_t* pPVRTC_endpoints,
		void* pDst_blocks,
		uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
	{}static void fixup_pvrtc1_4_modulation_rgba(
		const uastc_block* pSrc_blocks,
		const uint32_t* pPVRTC_endpoints,
		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
	{}bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
	{}bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
	{}void uastc_init()
	{}#endif // #if BASISD_SUPPORT_UASTC#if BASISD_SUPPORT_KTX2const uint8_t g_ktx2_file_identifier[12] =ktx2_transcoder::ktx2_transcoder() :{}void ktx2_transcoder::clear()
	{}bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
	{}uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
	{}const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
	{}bool ktx2_transcoder::start_transcoding()
	{}bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
	{}bool ktx2_transcoder::transcode_image_level(
		uint32_t level_index, uint32_t layer_index, uint32_t face_index, 
		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
		basist::transcoder_texture_format fmt,
		uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
		ktx2_transcoder_state* pState)
	{}bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
	{}bool ktx2_transcoder::decompress_etc1s_global_data()
	{}bool ktx2_transcoder::read_key_values()
	{}#endif // BASISD_SUPPORT_KTX2bool basisu_transcoder_supports_ktx2()
	{}bool basisu_transcoder_supports_ktx2_zstd()