Commits

ecsv  committed 0139889

Replace patented S3TC algorithm with S2TC 57881ec6

  • Participants
  • Parent commits 7d6f097

Comments (0)

Files changed (12)

File projects/unix/Makefile

 	$(SRCDIR)/GlideHQ/TxReSample.cpp \
 	$(SRCDIR)/GlideHQ/TxDbg.cpp \
 	$(SRCDIR)/GlideHQ/tc-1.1+/fxt1.c \
-	$(SRCDIR)/GlideHQ/tc-1.1+/dxtn.c \
 	$(SRCDIR)/GlideHQ/tc-1.1+/wrapper.c \
 	$(SRCDIR)/GlideHQ/tc-1.1+/texstore.c
 
   ifeq ($(TXCDXTN), 1)
     CPPFLAGS += -DTXCDXTN_EXTERNAL
   else 
-    SOURCE += $(SRCDIR)/GlideHQ/tc-1.1+/dxtn.c
+    SOURCE += \
+	$(SRCDIR)/GlideHQ/tc-1.1+/s2tc/s2tc_algorithm.cpp \
+	$(SRCDIR)/GlideHQ/tc-1.1+/s2tc/s2tc_libtxc_dxtn.cpp
   endif
 endif
 

File src/GlideHQ/tc-1.1+/dxtn.c

-/*
- * DXTn codec
- * Version:  1.1
- *
- * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
- *
- * this is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * this is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING.  If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
- */
-
-/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
- * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and
- * YUV conversions to determine representative colors.
- */
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include <stdio.h>
-
-#include "types.h"
-#include "internal.h"
-#include "dxtn.h"
-
-
-/***************************************************************************\
- * DXTn encoder
- *
- * The encoder was built by reversing the decoder,
- * and is vaguely based on FXT1 codec. Note that this code
- * is merely a proof of concept, since it is highly UNoptimized!
-\***************************************************************************/
-
-
-#define MAX_COMP 4 /* ever needed maximum number of components in texel */
-#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
-#define N_TEXELS 16 /* number of texels in a block (always 16) */
-#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
-
-
-static const int dxtn_color_tlat[2][4] = {
-    { 0, 2, 3, 1 },
-    { 0, 2, 1, 3 }
-};
-
-static const int dxtn_alpha_tlat[2][8] = {
-    { 0, 2, 3, 4, 5, 6, 7, 1 },
-    { 0, 2, 3, 4, 5, 1, 6, 7 }
-};
-
-
-static void
-dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps)
-{
-    float b, iv[MAX_COMP];   /* interpolation vector */
-
-    dword hi; /* high doubleword */
-    int color0, color1;
-    int n_vect;
-    const int n_comp = 3;
-    int black = 0;
-
-#ifndef YUV
-    int minSum = 2000; /* big enough */
-#else
-    int minSum = 2000000;
-#endif
-    int maxSum = -1; /* small enough */
-    int minCol = 0; /* phoudoin: silent compiler! */
-    int maxCol = 0; /* phoudoin: silent compiler! */
-
-    byte input[N_TEXELS][MAX_COMP];
-    int i, k, l;
-
-    /* make the whole block opaque */
-    /* we will NEVER reference ACOMP of any pixel */
-
-    /* 4 texels each line */
-#ifndef ARGB
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-	    for (i = 0; i < comps; i++) {
-		input[k + l * 4][i] = *lines[l]++;
-	    }
-	}
-    }
-#else
-    /* H.Morii - support for ARGB inputs */
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-          input[k + l * 4][2] = *lines[l]++;
-          input[k + l * 4][1] = *lines[l]++;
-          input[k + l * 4][0] = *lines[l]++;
-          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
-	}
-    }
-#endif
-
-    /* Our solution here is to find the darkest and brightest colors in
-     * the 4x4 tile and use those as the two representative colors.
-     * There are probably better algorithms to use (histogram-based).
-     */
-    for (k = 0; k < N_TEXELS; k++) {
-	int sum = 0;
-#ifndef YUV
-	for (i = 0; i < n_comp; i++) {
-	    sum += input[k][i];
-	}
-#else
-        /* RGB to YUV conversion according to CCIR 601 specs
-         * Y = 0.299R+0.587G+0.114B
-         * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
-         * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
-         */
-        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
-#endif
-	if (minSum > sum) {
-	    minSum = sum;
-	    minCol = k;
-	}
-	if (maxSum < sum) {
-	    maxSum = sum;
-	    maxCol = k;
-	}
-	if (sum == 0) {
-	    black = 1;
-	}
-    }
-
-    color0 = COLOR565(input[minCol]);
-    color1 = COLOR565(input[maxCol]);
-
-    if (color0 == color1) {
-	/* we'll use 3-vector */
-	cc[0] = color0 | (color1 << 16);
-	hi = black ? -1 : 0;
-    } else {
-	if (black && ((color0 == 0) || (color1 == 0))) {
-	    /* we still can use 4-vector */
-	    black = 0;
-	}
-
-	if (black ^ (color0 <= color1)) {
-	    int aux;
-	    aux = color0;
-	    color0 = color1;
-	    color1 = aux;
-	    aux = minCol;
-	    minCol = maxCol;
-	    maxCol = aux;
-	}
-	n_vect = (color0 <= color1) ? 2 : 3;
-
-	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
-	/* add in texels */
-	cc[0] = color0 | (color1 << 16);
-	hi = 0;
-	for (k = N_TEXELS - 1; k >= 0; k--) {
-	    int texel = 3;
-	    int sum = 0;
-	    if (black) {
-		for (i = 0; i < n_comp; i++) {
-		    sum += input[k][i];
-		}
-	    }
-	    if (!black || sum) {
-		/* interpolate color */
-		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
-		texel = dxtn_color_tlat[black][texel];
-	    }
-	    /* add in texel */
-	    hi <<= 2;
-	    hi |= texel;
-	}
-    }
-    cc[1] = hi;
-}
-
-
-static void
-dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
-    float b, iv[MAX_COMP];	/* interpolation vector */
-
-    dword hi;		/* high doubleword */
-    int color0, color1;
-    int n_vect;
-    const int n_comp = 3;
-    int transparent = 0;
-
-#ifndef YUV
-    int minSum = 2000;          /* big enough */
-#else
-    int minSum = 2000000;
-#endif
-    int maxSum = -1;		/* small enough */
-    int minCol = 0;		/* phoudoin: silent compiler! */
-    int maxCol = 0;		/* phoudoin: silent compiler! */
-
-    byte input[N_TEXELS][MAX_COMP];
-    int i, k, l;
-
-    if (comps == 3) {
-	/* make the whole block opaque */
-	memset(input, -1, sizeof(input));
-    }
-
-    /* 4 texels each line */
-#ifndef ARGB
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-	    for (i = 0; i < comps; i++) {
-		input[k + l * 4][i] = *lines[l]++;
-	    }
-	}
-    }
-#else
-    /* H.Morii - support for ARGB inputs */
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-          input[k + l * 4][2] = *lines[l]++;
-          input[k + l * 4][1] = *lines[l]++;
-          input[k + l * 4][0] = *lines[l]++;
-          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
-	}
-    }
-#endif
-
-    /* Our solution here is to find the darkest and brightest colors in
-     * the 4x4 tile and use those as the two representative colors.
-     * There are probably better algorithms to use (histogram-based).
-     */
-    for (k = 0; k < N_TEXELS; k++) {
-	int sum = 0;
-#ifndef YUV
-	for (i = 0; i < n_comp; i++) {
-	    sum += input[k][i];
-	}
-#else
-        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
-#endif
-	if (minSum > sum) {
-	    minSum = sum;
-	    minCol = k;
-	}
-	if (maxSum < sum) {
-	    maxSum = sum;
-	    maxCol = k;
-	}
-	if (input[k][ACOMP] < 128) {
-	    transparent = 1;
-	}
-    }
-
-    color0 = COLOR565(input[minCol]);
-    color1 = COLOR565(input[maxCol]);
-
-    if (color0 == color1) {
-	/* we'll use 3-vector */
-	cc[0] = color0 | (color1 << 16);
-	hi = transparent ? -1 : 0;
-    } else {
-	if (transparent ^ (color0 <= color1)) {
-	    int aux;
-	    aux = color0;
-	    color0 = color1;
-	    color1 = aux;
-	    aux = minCol;
-	    minCol = maxCol;
-	    maxCol = aux;
-	}
-	n_vect = (color0 <= color1) ? 2 : 3;
-
-	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
-	/* add in texels */
-	cc[0] = color0 | (color1 << 16);
-	hi = 0;
-	for (k = N_TEXELS - 1; k >= 0; k--) {
-	    int texel = 3;
-	    if (input[k][ACOMP] >= 128) {
-		/* interpolate color */
-		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
-		texel = dxtn_color_tlat[transparent][texel];
-	    }
-	    /* add in texel */
-	    hi <<= 2;
-	    hi |= texel;
-	}
-    }
-    cc[1] = hi;
-}
-
-
-static void
-dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
-    float b, iv[MAX_COMP];	/* interpolation vector */
-
-    dword lolo, lohi;	/* low quadword: lo dword, hi dword */
-    dword hihi;		/* high quadword: high dword */
-    int color0, color1;
-    const int n_vect = 3;
-    const int n_comp = 3;
-
-#ifndef YUV
-    int minSum = 2000;          /* big enough */
-#else
-    int minSum = 2000000;
-#endif
-    int maxSum = -1;		/* small enough */
-    int minCol = 0;		/* phoudoin: silent compiler! */
-    int maxCol = 0;		/* phoudoin: silent compiler! */
-
-    byte input[N_TEXELS][MAX_COMP];
-    int i, k, l;
-
-    if (comps == 3) {
-	/* make the whole block opaque */
-	memset(input, -1, sizeof(input));
-    }
-
-    /* 4 texels each line */
-#ifndef ARGB
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-	    for (i = 0; i < comps; i++) {
-		input[k + l * 4][i] = *lines[l]++;
-	    }
-	}
-    }
-#else
-    /* H.Morii - support for ARGB inputs */
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-          input[k + l * 4][2] = *lines[l]++;
-          input[k + l * 4][1] = *lines[l]++;
-          input[k + l * 4][0] = *lines[l]++;
-          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
-	}
-    }
-#endif
-
-    /* Our solution here is to find the darkest and brightest colors in
-     * the 4x4 tile and use those as the two representative colors.
-     * There are probably better algorithms to use (histogram-based).
-     */
-    for (k = 0; k < N_TEXELS; k++) {
-	int sum = 0;
-#ifndef YUV
-	for (i = 0; i < n_comp; i++) {
-	    sum += input[k][i];
-	}
-#else
-        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
-#endif
-	if (minSum > sum) {
-	    minSum = sum;
-	    minCol = k;
-	}
-	if (maxSum < sum) {
-	    maxSum = sum;
-	    maxCol = k;
-	}
-    }
-
-    /* add in alphas */
-    lolo = lohi = 0;
-    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
-	/* add in alpha */
-	lohi <<= 4;
-	lohi |= input[k][ACOMP] >> 4;
-    }
-    cc[1] = lohi;
-    for (; k >= 0; k--) {
-	/* add in alpha */
-	lolo <<= 4;
-	lolo |= input[k][ACOMP] >> 4;
-    }
-    cc[0] = lolo;
-
-    color0 = COLOR565(input[minCol]);
-    color1 = COLOR565(input[maxCol]);
-
-#ifdef RADEON
-    /* H.Morii - Workaround for ATI Radeon
-     * According to the OpenGL EXT_texture_compression_s3tc specs,
-     * the encoding of the RGB components for DXT3 and DXT5 formats
-     * use the non-transparent encodings of DXT1 but treated as
-     * though color0 > color1, regardless of the actual values of
-     * color0 and color1. ATI Radeons however require the values to
-     * be color0 > color1.
-     */
-    if (color0 < color1) {
-	int aux;
-	aux = color0;
-	color0 = color1;
-	color1 = aux;
-	aux = minCol;
-	minCol = maxCol;
-	maxCol = aux;
-    }
-#endif
-
-    cc[2] = color0 | (color1 << 16);
-
-    hihi = 0;
-    if (color0 != color1) {
-	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
-	/* add in texels */
-	for (k = N_TEXELS - 1; k >= 0; k--) {
-	    int texel;
-	    /* interpolate color */
-	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
-	    texel = dxtn_color_tlat[0][texel];
-	    /* add in texel */
-	    hihi <<= 2;
-	    hihi |= texel;
-	}
-    }
-    cc[3] = hihi;
-}
-
-
-static void
-dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
-    float b, iv[MAX_COMP];	/* interpolation vector */
-
-    qword lo;			/* low quadword */
-    dword hihi;		/* high quadword: high dword */
-    int color0, color1;
-    const int n_vect = 3;
-    const int n_comp = 3;
-
-#ifndef YUV
-    int minSum = 2000;          /* big enough */
-#else
-    int minSum = 2000000;
-#endif
-    int maxSum = -1;		/* small enough */
-    int minCol = 0;		/* phoudoin: silent compiler! */
-    int maxCol = 0;		/* phoudoin: silent compiler! */
-    int alpha0 = 2000;		/* big enough */
-    int alpha1 = -1;		/* small enough */
-    int anyZero = 0, anyOne = 0;
-    int a_vect;
-
-    byte input[N_TEXELS][MAX_COMP];
-    int i, k, l;
-
-    if (comps == 3) {
-	/* make the whole block opaque */
-	memset(input, -1, sizeof(input));
-    }
-
-    /* 4 texels each line */
-#ifndef ARGB
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-	    for (i = 0; i < comps; i++) {
-		input[k + l * 4][i] = *lines[l]++;
-	    }
-	}
-    }
-#else
-    /* H.Morii - support for ARGB inputs */
-    for (l = 0; l < 4; l++) {
-	for (k = 0; k < 4; k++) {
-          input[k + l * 4][2] = *lines[l]++;
-          input[k + l * 4][1] = *lines[l]++;
-          input[k + l * 4][0] = *lines[l]++;
-          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
-	}
-    }
-#endif
-
-    /* Our solution here is to find the darkest and brightest colors in
-     * the 4x4 tile and use those as the two representative colors.
-     * There are probably better algorithms to use (histogram-based).
-     */
-    for (k = 0; k < N_TEXELS; k++) {
-	int sum = 0;
-#ifndef YUV
-	for (i = 0; i < n_comp; i++) {
-	    sum += input[k][i];
-	}
-#else
-        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
-#endif
-	if (minSum > sum) {
-	    minSum = sum;
-	    minCol = k;
-	}
-	if (maxSum < sum) {
-	    maxSum = sum;
-	    maxCol = k;
-	}
-	if (alpha0 > input[k][ACOMP]) {
-	    alpha0 = input[k][ACOMP];
-	}
-	if (alpha1 < input[k][ACOMP]) {
-	    alpha1 = input[k][ACOMP];
-	}
-	if (input[k][ACOMP] == 0) {
-	    anyZero = 1;
-	}
-	if (input[k][ACOMP] == 255) {
-	    anyOne = 1;
-	}
-    }
-
-    /* add in alphas */
-    if (alpha0 == alpha1) {
-	/* we'll use 6-vector */
-	cc[0] = alpha0 | (alpha1 << 8);
-	cc[1] = 0;
-    } else {
-	if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
-	    /* we still might use 8-vector */
-	    anyZero = 0;
-	}
-	if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
-	    /* we still might use 8-vector */
-	    anyOne = 0;
-	}
-	if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
-	    int aux;
-	    aux = alpha0;
-	    alpha0 = alpha1;
-	    alpha1 = aux;
-	}
-	a_vect = (alpha0 <= alpha1) ? 5 : 7;
-
-	/* compute interpolation vector */
-	iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
-	b = -iv[ACOMP] * alpha0 + 0.5F;
-
-	/* add in alphas */
-	Q_MOV32(lo, 0);
-	for (k = N_TEXELS - 1; k >= 0; k--) {
-	    int texel = -1;
-	    if (anyZero | anyOne) {
-		if (input[k][ACOMP] == 0) {
-		    texel = 6;
-		} else if (input[k][ACOMP] == 255) {
-		    texel = 7;
-		}
-	    }
-	    /* interpolate alpha */
-	    if (texel == -1) {
-		float dot = input[k][ACOMP] * iv[ACOMP];
-		texel = (int)(dot + b);
-#if SAFECDOT
-		if (texel < 0) {
-		    texel = 0;
-		} else if (texel > a_vect) {
-		    texel = a_vect;
-		}
-#endif
-		texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
-	    }
-	    /* add in texel */
-	    Q_SHL(lo, 3);
-	    Q_OR32(lo, texel);
-	}
-	Q_SHL(lo, 16);
-	Q_OR32(lo, alpha0 | (alpha1 << 8));
-	((qword *)cc)[0] = lo;
-    }
-
-    color0 = COLOR565(input[minCol]);
-    color1 = COLOR565(input[maxCol]);
-
-#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
-    if (color0 < color1) {
-	int aux;
-	aux = color0;
-	color0 = color1;
-	color1 = aux;
-	aux = minCol;
-	minCol = maxCol;
-	maxCol = aux;
-    }
-#endif
-
-    cc[2] = color0 | (color1 << 16);
-
-    hihi = 0;
-    if (color0 != color1) {
-	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
-	/* add in texels */
-	for (k = N_TEXELS - 1; k >= 0; k--) {
-	    int texel;
-	    /* interpolate color */
-	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
-	    texel = dxtn_color_tlat[0][texel];
-	    /* add in texel */
-	    hihi <<= 2;
-	    hihi |= texel;
-	}
-    }
-    cc[3] = hihi;
-}
-
-
-#define ENCODER(dxtn, n)						\
-int TAPIENTRY								\
-dxtn##_encode (int width, int height, int comps,			\
-	       const void *source, int srcRowStride,			\
-	       void *dest, int destRowStride)				\
-{									\
-    int x, y;								\
-    const byte *data;							\
-    dword *encoded = (dword *)dest;					\
-    void *newSource = NULL;						\
-									\
-    /* Replicate image if width is not M4 or height is not M4 */	\
-    if ((width & 3) | (height & 3)) {					\
-	int newWidth = (width + 3) & ~3;				\
-	int newHeight = (height + 3) & ~3;				\
-	newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
-	_mesa_upscale_teximage2d(width, height, newWidth, newHeight,	\
-                               comps, (const byte *)source,		\
-			       srcRowStride, (byte *)newSource);	\
-	source = newSource;						\
-	width = newWidth;						\
-	height = newHeight;						\
-	srcRowStride = comps * newWidth;				\
-    }									\
-									\
-    data = (const byte *)source;					\
-    destRowStride = (destRowStride - width * n) / 4;			\
-    for (y = 0; y < height; y += 4) {					\
-	unsigned int offs = 0 + (y + 0) * srcRowStride;			\
-	for (x = 0; x < width; x += 4) {				\
-	    const byte *lines[4];					\
-	    lines[0] = &data[offs];					\
-	    lines[1] = lines[0] + srcRowStride;				\
-	    lines[2] = lines[1] + srcRowStride;				\
-	    lines[3] = lines[2] + srcRowStride;				\
-	    offs += 4 * comps;						\
-	    dxtn##_quantize(encoded, lines, comps);			\
-	    /* 4x4 block */						\
-	    encoded += n;						\
-	}								\
-	encoded += destRowStride;					\
-    }									\
-									\
-    if (newSource != NULL) {						\
-	free(newSource);						\
-    }									\
-									\
-    return 0;								\
-}
-
-ENCODER(dxt1_rgb,  2)
-ENCODER(dxt1_rgba, 2)
-ENCODER(dxt3_rgba, 4)
-ENCODER(dxt5_rgba, 4)
-
-
-/***************************************************************************\
- * DXTn decoder
- *
- * The decoder is based on GL_EXT_texture_compression_s3tc
- * specification and serves as a concept for the encoder.
-\***************************************************************************/
-
-
-/* lookup table for scaling 4 bit colors up to 8 bits */
-static const byte _rgb_scale_4[] = {
-    0,   17,  34,  51,  68,  85,  102, 119,
-    136, 153, 170, 187, 204, 221, 238, 255
-};
-
-/* lookup table for scaling 5 bit colors up to 8 bits */
-static const byte _rgb_scale_5[] = {
-    0,   8,   16,  25,  33,  41,  49,  58,
-    66,  74,  82,  90,  99,  107, 115, 123,
-    132, 140, 148, 156, 165, 173, 181, 189,
-    197, 206, 214, 222, 230, 239, 247, 255
-};
-
-/* lookup table for scaling 6 bit colors up to 8 bits */
-static const byte _rgb_scale_6[] = {
-    0,   4,   8,   12,  16,  20,  24,  28,
-    32,  36,  40,  45,  49,  53,  57,  61,
-    65,  69,  73,  77,  81,  85,  89,  93,
-    97,  101, 105, 109, 113, 117, 121, 125,
-    130, 134, 138, 142, 146, 150, 154, 158,
-    162, 166, 170, 174, 178, 182, 186, 190,
-    194, 198, 202, 206, 210, 215, 219, 223,
-    227, 231, 235, 239, 243, 247, 251, 255
-};
-
-
-#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
-#define UP4(c) _rgb_scale_4[(c) & 15]
-#define UP5(c) _rgb_scale_5[(c) & 31]
-#define UP6(c) _rgb_scale_6[(c) & 63]
-#define ZERO_4UBV(v) *((dword *)(v)) = 0
-
-
-void TAPIENTRY
-dxt1_rgb_decode_1 (const void *texture, int stride,
-		   int i, int j, byte *rgba)
-{
-    const byte *src = (const byte *)texture
-		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
-    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
-    if (code == 0) {
-	rgba[RCOMP] = UP5(CC_SEL(src, 11));
-	rgba[GCOMP] = UP6(CC_SEL(src,  5));
-	rgba[BCOMP] = UP5(CC_SEL(src,  0));
-    } else if (code == 1) {
-	rgba[RCOMP] = UP5(CC_SEL(src, 27));
-	rgba[GCOMP] = UP6(CC_SEL(src, 21));
-	rgba[BCOMP] = UP5(CC_SEL(src, 16));
-    } else {
-	const word col0 = src[0] | (src[1] << 8);
-	const word col1 = src[2] | (src[3] << 8);
-	if (col0 > col1) {
-	    if (code == 2) {
-		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
-		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
-		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
-	    } else {
-		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
-		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
-		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
-	    }
-	} else {
-	    if (code == 2) {
-		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
-		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
-		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
-	    } else {
-		ZERO_4UBV(rgba);
-	    }
-	}
-    }
-    rgba[ACOMP] = 255;
-}
-
-
-void TAPIENTRY
-dxt1_rgba_decode_1 (const void *texture, int stride,
-		    int i, int j, byte *rgba)
-{
-    /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
-    const byte *src = (const byte *)texture
-		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
-    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
-    if (code == 0) {
-	rgba[RCOMP] = UP5(CC_SEL(src, 11));
-	rgba[GCOMP] = UP6(CC_SEL(src,  5));
-	rgba[BCOMP] = UP5(CC_SEL(src,  0));
-	rgba[ACOMP] = 255;
-    } else if (code == 1) {
-	rgba[RCOMP] = UP5(CC_SEL(src, 27));
-	rgba[GCOMP] = UP6(CC_SEL(src, 21));
-	rgba[BCOMP] = UP5(CC_SEL(src, 16));
-	rgba[ACOMP] = 255;
-    } else {
-	const word col0 = src[0] | (src[1] << 8);
-	const word col1 = src[2] | (src[3] << 8);
-	if (col0 > col1) {
-	    if (code == 2) {
-		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
-		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
-		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
-	    } else {
-		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
-		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
-		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
-	    }
-	    rgba[ACOMP] = 255;
-	} else {
-	    if (code == 2) {
-		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
-		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
-		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
-		rgba[ACOMP] = 255;
-	    } else {
-		ZERO_4UBV(rgba);
-	    }
-	}
-    }
-}
-
-
-void TAPIENTRY
-dxt3_rgba_decode_1 (const void *texture, int stride,
-		    int i, int j, byte *rgba)
-{
-    const byte *src = (const byte *)texture
-		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
-    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
-    const dword *cc = (const dword *)(src + 8);
-    if (code == 0) {
-	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
-	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
-	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
-    } else if (code == 1) {
-	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
-	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
-	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
-    } else if (code == 2) {
-	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
-	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
-	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
-	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
-    } else {
-	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
-	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
-	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
-    }
-    rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
-}
-
-
-void TAPIENTRY
-dxt5_rgba_decode_1 (const void *texture, int stride,
-		    int i, int j, byte *rgba)
-{
-    const byte *src = (const byte *)texture
-		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
-    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
-    const dword *cc = (const dword *)(src + 8);
-    const byte alpha0 = src[0];
-    const byte alpha1 = src[1];
-    const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
-    const int acode = ((alphaShift == 31)
-			? CC_SEL(src + 2, alphaShift - 16)
-			: CC_SEL(src, alphaShift)) & 0x7;
-    if (code == 0) {
-	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
-	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
-	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
-    } else if (code == 1) {
-	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
-	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
-	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
-    } else if (code == 2) {
-	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
-	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
-	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
-	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
-    } else {
-	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
-	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
-	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
-    }
-    if (acode == 0) {
-	rgba[ACOMP] = alpha0;
-    } else if (acode == 1) {
-	rgba[ACOMP] = alpha1;
-    } else if (alpha0 > alpha1) {
-	rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7;
-    } else if (acode == 6) {
-	rgba[ACOMP] = 0;
-    } else if (acode == 7) {
-	rgba[ACOMP] = 255;
-    } else {
-	rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5;
-    }
-}

File src/GlideHQ/tc-1.1+/dxtn.h

-/*
- * DXTn codec
- * Version:  1.1
- *
- * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
- *
- * this is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * this is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING.  If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
- */
-
-
-#ifndef DXTN_H_included
-#define DXTN_H_included
-
-TAPI int TAPIENTRY
-dxt1_rgb_encode (int width, int height, int comps,
-		 const void *source, int srcRowStride,
-		 void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt1_rgba_encode (int width, int height, int comps,
-		  const void *source, int srcRowStride,
-		  void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt3_rgba_encode (int width, int height, int comps,
-		  const void *source, int srcRowStride,
-		  void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt5_rgba_encode (int width, int height, int comps,
-		  const void *source, int srcRowStride,
-		  void *dest, int destRowStride);
-
-TAPI void TAPIENTRY
-dxt1_rgb_decode_1 (const void *texture, int stride /* in pixels */,
-		   int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt1_rgba_decode_1 (const void *texture, int stride /* in pixels */,
-		    int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt3_rgba_decode_1 (const void *texture, int stride /* in pixels */,
-		    int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt5_rgba_decode_1 (const void *texture, int stride /* in pixels */,
-		    int i, int j, byte *rgba);
-
-#endif

File src/GlideHQ/tc-1.1+/internal.h

    (DST)[3] = (SRC)[3];             \
 } while (0)
 
-void
-_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
-			  unsigned int outWidth, unsigned int outHeight,
-			  unsigned int comps,
-			  const byte *src, int srcRowStride,
-			  unsigned char *dest);
-
 /** Copy a 4-element unsigned byte vector */
 static inline void
 COPY_4UBV(uint8_t dst[4], const uint8_t src[4])

File src/GlideHQ/tc-1.1+/s2tc/s2tc_algorithm.cpp

+/*
+ * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
+#include "s2tc_license.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <algorithm>
+#include <iostream>
+
+#include "s2tc_algorithm.h"
+#include "s2tc_common.h"
+
+namespace
+{
+	template<class T> struct color_type_info
+	{
+	};
+	template<> struct color_type_info<unsigned char>
+	{
+		static const unsigned char min_value = 0;
+		static const unsigned char max_value = 255;
+	};
+
+	struct color_t
+	{
+		signed char r, g, b;
+	};
+	inline color_t make_color_t()
+	{
+		return (color_t) {0, 0, 0};
+	}
+	inline color_t make_color_t(signed char r_, signed char g_, signed char b_)
+	{
+		return (color_t) {r_, g_, b_};
+	}
+	inline color_t make_color_t(int i)
+	{
+		return (color_t) {i >> 3, i >> 2, i >> 3};
+	}
+	inline bool operator==(const color_t &a, const color_t &b)
+	{
+		return a.r == b.r && a.g == b.g && a.b == b.b;
+	}
+	inline bool operator<(const color_t &a, const color_t &b)
+	{
+		signed char d;
+		d = a.r - b.r;
+		if(d)
+			return d < 0;
+		d = a.g - b.g;
+		if(d)
+			return d < 0;
+		d = a.b - b.b;
+		return d < 0;
+	}
+	inline color_t &operator--(color_t &c)
+	{
+		if(c.b > 0)
+		{
+			--c.b;
+		}
+		else if(c.g > 0)
+		{
+			c.b = 31;
+			--c.g;
+		}
+		else if(c.r > 0)
+		{
+			c.b = 31;
+			c.g = 63;
+			--c.r;
+		}
+		else
+		{
+			c.b = 31;
+			c.g = 63;
+			c.r = 31;
+		}
+		return c;
+	}
+	inline color_t &operator++(color_t &c)
+	{
+		if(c.b < 31)
+		{
+			++c.b;
+		}
+		else if(c.g < 63)
+		{
+			c.b = 0;
+			++c.g;
+		}
+		else if(c.r < 31)
+		{
+			c.b = 0;
+			c.g = 0;
+			++c.r;
+		}
+		else
+		{
+			c.b = 0;
+			c.g = 0;
+			c.r = 0;
+		}
+		return c;
+	}
+	template<> struct color_type_info<color_t>
+	{
+		static const color_t min_value;
+		static const color_t max_value;
+	};
+	const color_t color_type_info<color_t>::min_value = { 0, 0, 0 };
+	const color_t color_type_info<color_t>::max_value = { 31, 63, 31 };
+
+	struct bigcolor_t
+	{
+		int r, g, b;
+
+		inline bigcolor_t(): r(0), g(0), b(0)
+		{
+		}
+
+		inline bigcolor_t &operator+=(const color_t &c)
+		{
+			r += c.r;
+			g += c.g;
+			b += c.b;
+			return *this;
+		}
+
+		inline bigcolor_t &operator+=(int v)
+		{
+			r += v;
+			g += v;
+			b += v;
+			return *this;
+		}
+
+		inline bigcolor_t operator+(int v)
+		{
+			bigcolor_t out = *this;
+			out += v;
+			return out;
+		}
+
+		inline bigcolor_t &operator/=(int v)
+		{
+			r /= v;
+			g /= v;
+			b /= v;
+			return *this;
+		}
+
+		inline bigcolor_t operator/(int v)
+		{
+			bigcolor_t out = *this;
+			out /= v;
+			return out;
+		}
+
+		inline bigcolor_t &operator<<=(int v)
+		{
+			r <<= v;
+			g <<= v;
+			b <<= v;
+			return *this;
+		}
+
+		inline bigcolor_t operator<<(int v)
+		{
+			bigcolor_t out = *this;
+			out <<= v;
+			return out;
+		}
+
+		inline operator color_t()
+		{
+			color_t out;
+			out.r = r & 31;
+			out.g = g & 63;
+			out.b = b & 31;
+			return out;
+		}
+	};
+
+	std::ostream &operator<<(std::ostream &ost, const color_t &c)
+	{
+		return ost << "make_color_t(" << int(c.r) << ", " << int(c.g) << ", " << int(c.b) << ")";
+	}
+
+	std::ostream &operator<<(std::ostream &ost, const bigcolor_t &c)
+	{
+		return ost << "bigcolor_t(" << c.r << ", " << c.g << ", " << c.b << ")";
+	}
+
+	// 16 differences must fit in int
+	// i.e. a difference must be lower than 2^27
+
+	// shift right, rounded
+#define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
+
+	inline int color_dist_avg(const color_t &a, const color_t &b)
+	{
+		int dr = a.r - b.r; // multiplier: 31 (-1..1)
+		int dg = a.g - b.g; // multiplier: 63 (-1..1)
+		int db = a.b - b.b; // multiplier: 31 (-1..1)
+		return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
+	}
+
+	inline int color_dist_wavg(const color_t &a, const color_t &b)
+	{
+		int dr = a.r - b.r; // multiplier: 31 (-1..1)
+		int dg = a.g - b.g; // multiplier: 63 (-1..1)
+		int db = a.b - b.b; // multiplier: 31 (-1..1)
+		return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
+		// weighted 4:16:1
+	}
+
+	inline int color_dist_yuv(const color_t &a, const color_t &b)
+	{
+		int dr = a.r - b.r; // multiplier: 31 (-1..1)
+		int dg = a.g - b.g; // multiplier: 63 (-1..1)
+		int db = a.b - b.b; // multiplier: 31 (-1..1)
+		int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
+		int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
+		int v = db * 202 - y; // * 0.5 / (1 - 0.11)
+		return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
+		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
+		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
+	}
+
+	inline int color_dist_rgb(const color_t &a, const color_t &b)
+	{
+		int dr = a.r - b.r; // multiplier: 31 (-1..1)
+		int dg = a.g - b.g; // multiplier: 63 (-1..1)
+		int db = a.b - b.b; // multiplier: 31 (-1..1)
+		int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
+		int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
+		int v = db * 202 - y; // * 0.5 / (1 - 0.07)
+		return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
+		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
+		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
+	}
+
+	inline int color_dist_srgb(const color_t &a, const color_t &b)
+	{
+		int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
+		int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
+		int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
+		int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
+		int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
+		int v = db * 409 - y; // * 0.5 / (1 - 0.11)
+		int sy = SHRR(y, 3) * SHRR(y, 4);
+		int su = SHRR(u, 3) * SHRR(u, 4);
+		int sv = SHRR(v, 3) * SHRR(v, 4);
+		return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
+		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
+		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
+	}
+
+	inline int srgb_get_y(const color_t &a)
+	{
+		// convert to linear
+		int r = a.r * (int) a.r;
+		int g = a.g * (int) a.g;
+		int b = a.b * (int) a.b;
+		// find luminance
+		int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
+		// square root it (!)
+		y = sqrtf(y) + 0.5f; // now in range 0 to 3815
+		return y;
+	}
+
+	inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
+	{
+		// get Y
+		int ay = srgb_get_y(a);
+		int by = srgb_get_y(b);
+		// get UV
+		int au = a.r * 191 - ay;
+		int av = a.b * 191 - ay;
+		int bu = b.r * 191 - by;
+		int bv = b.b * 191 - by;
+		// get differences
+		int y = ay - by;
+		int u = au - bu;
+		int v = av - bv;
+		return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
+		// weight for u: ???
+		// weight for v: ???
+	}
+
+	inline int color_dist_normalmap(const color_t &a, const color_t &b)
+	{
+		float ca[3], cb[3], n;
+		ca[0] = a.r / 31.0f * 2 - 1;
+		ca[1] = a.g / 63.0f * 2 - 1;
+		ca[2] = a.b / 31.0f * 2 - 1;
+		cb[0] = b.r / 31.0f * 2 - 1;
+		cb[1] = b.g / 63.0f * 2 - 1;
+		cb[2] = b.b / 31.0f * 2 - 1;
+		n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
+		if(n > 0)
+		{
+			n = 1.0f / sqrtf(n);
+			ca[0] *= n;
+			ca[1] *= n;
+			ca[2] *= n;
+		}
+		n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
+		if(n > 0)
+		{
+			n = 1.0f / sqrtf(n);
+			cb[0] *= n;
+			cb[1] *= n;
+			cb[2] *= n;
+		}
+
+		return
+			100000 *
+			(
+				(cb[0] - ca[0]) * (cb[0] - ca[0])
+				+
+				(cb[1] - ca[1]) * (cb[1] - ca[1])
+				+
+				(cb[2] - ca[2]) * (cb[2] - ca[2])
+			)
+			;
+		// max value: 1000 * (4 + 4 + 4) = 6000
+	}
+
+	typedef int ColorDistFunc(const color_t &a, const color_t &b);
+
+	inline int alpha_dist(unsigned char a, unsigned char b)
+	{
+		return (a - (int) b) * (a - (int) b);
+	}
+
+	template <class T, class F>
+	// n: input count
+	// m: total color count (including non-counted inputs)
+	// m >= n
+	inline void reduce_colors_inplace(T *c, int n, int m, F dist)
+	{
+		int i, j, k;
+		int bestsum = -1;
+		int besti = 0;
+		int bestj = 1;
+		int dists[m][n];
+		// first the square
+		for(i = 0; i < n; ++i)
+		{
+			dists[i][i] = 0;
+			for(j = i+1; j < n; ++j)
+			{
+				int d = dist(c[i], c[j]);
+				dists[i][j] = dists[j][i] = d;
+			}
+		}
+		// then the box
+		for(; i < m; ++i)
+		{
+			for(j = 0; j < n; ++j)
+			{
+				int d = dist(c[i], c[j]);
+				dists[i][j] = d;
+			}
+		}
+		for(i = 0; i < m; ++i)
+			for(j = i+1; j < m; ++j)
+			{
+				int sum = 0;
+				for(k = 0; k < n; ++k)
+				{
+					int di = dists[i][k];
+					int dj = dists[j][k];
+					int m  = min(di, dj);
+					sum += m;
+				}
+				if(bestsum < 0 || sum < bestsum)
+				{
+					bestsum = sum;
+					besti = i;
+					bestj = j;
+				}
+			}
+		if(besti != 0)
+			c[0] = c[besti];
+		if(bestj != 1)
+			c[1] = c[bestj];
+	}
+	template <class T, class F>
+	inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
+	{
+		// TODO fix this for ramp encoding!
+		int i, j, k;
+		int bestsum = -1;
+		int besti = 0;
+		int bestj = 1;
+		int dists[m+2][n];
+		// first the square
+		for(i = 0; i < n; ++i)
+		{
+			dists[i][i] = 0;
+			for(j = i+1; j < n; ++j)
+			{
+				int d = dist(c[i], c[j]);
+				dists[i][j] = dists[j][i] = d;
+			}
+		}
+		// then the box
+		for(; i < m; ++i)
+		{
+			for(j = 0; j < n; ++j)
+			{
+				int d = dist(c[i], c[j]);
+				dists[i][j] = d;
+			}
+		}
+		// then the two extra rows
+		for(j = 0; j < n; ++j)
+		{
+			int d = dist(fix0, c[j]);
+			dists[m][j] = d;
+		}
+		for(j = 0; j < n; ++j)
+		{
+			int d = dist(fix1, c[j]);
+			dists[m+1][j] = d;
+		}
+		for(i = 0; i < m; ++i)
+			for(j = i+1; j < m; ++j)
+			{
+				int sum = 0;
+				for(k = 0; k < n; ++k)
+				{
+					int di = dists[i][k];
+					int dj = dists[j][k];
+					int d0 = dists[m][k];
+					int d1 = dists[m+1][k];
+					int m  = min(min(di, dj), min(d0, d1));
+					sum += m;
+				}
+				if(bestsum < 0 || sum < bestsum)
+				{
+					bestsum = sum;
+					besti = i;
+					bestj = j;
+				}
+			}
+		if(besti != 0)
+			c[0] = c[besti];
+		if(bestj != 1)
+			c[1] = c[bestj];
+	}
+
+	enum CompressionMode
+	{
+		MODE_NORMAL,
+		MODE_FAST
+	};
+
+	template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
+	{
+		return comp;
+	}
+	template<> inline int refine_component_encode<color_dist_srgb>(int comp)
+	{
+		return comp * comp;
+	}
+	template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
+	{
+		return comp * comp;
+	}
+
+	template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
+	{
+		return comp;
+	}
+	template<> inline int refine_component_decode<color_dist_srgb>(int comp)
+	{
+		return sqrtf(comp) + 0.5f;
+	}
+	template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
+	{
+		return sqrtf(comp) + 0.5f;
+	}
+
+	template <class T, class Big, int scale_l>
+	struct s2tc_evaluate_colors_result_t;
+
+	template <class T, class Big>
+	struct s2tc_evaluate_colors_result_t<T, Big, 1>
+	{
+		// uses:
+		//   Big << int
+		//   Big / int
+		//   Big + int
+		//   Big += T
+		int n0, n1;
+		Big S0, S1;
+		inline s2tc_evaluate_colors_result_t():
+			n0(), n1(), S0(), S1()
+		{
+		}
+		inline void add(int l, T a)
+		{
+			if(l)
+			{
+				++n1;
+				S1 += a;
+			}
+			else
+			{
+				++n0;
+				S0 += a;
+			}
+		}
+		inline bool evaluate(T &a, T &b)
+		{
+			if(!n0 && !n1)
+				return false;
+			if(n0)
+				a = ((S0 << 1) + n0) / (n0 << 1);
+			if(n1)
+				b = ((S1 << 1) + n1) / (n1 << 1);
+			return true;
+		}
+	};
+
+	template <class T, class Big, int scale_l>
+	struct s2tc_evaluate_colors_result_t
+	{
+		// a possible implementation of inferred color/alpha values
+		// refining would go here
+	};
+
+	template <class T>
+	struct s2tc_evaluate_colors_result_null_t
+	{
+		inline void add(int l, T a)
+		{
+		}
+	};
+
+	template<class T> T get(const unsigned char *buf)
+	{
+		T c;
+		c.r = buf[0];
+		c.g = buf[1];
+		c.b = buf[2];
+		return c;
+	}
+	template<> unsigned char get<unsigned char>(const unsigned char *buf)
+	{
+		return buf[3]; // extract alpha
+	}
+
+	template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
+	inline unsigned int s2tc_try_encode_block(
+			Arr &out,
+			Eval &res,
+			Dist ColorDist,
+			const unsigned char *in, int iw, int w, int h,
+			const T colors_ref[])
+	{
+		unsigned int score = 0;
+		for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
+		{
+			int i = y * 4 + x;
+			const unsigned char *pix = &in[(y * iw + x) * 4];
+
+			if(have_trans)
+			{
+				if(pix[3] == 0)
+				{
+					out.do_or(i, (1 << bpp) - 1);
+					continue;
+				}
+			}
+
+			T color(get<T>(pix));
+			int best = 0;
+			int bestdist = ColorDist(color, colors_ref[0]);
+			for(int k = 1; k < n_input; ++k)
+			{
+				int dist = ColorDist(color, colors_ref[k]);
+				if(dist < bestdist)
+				{
+					bestdist = dist;
+					best = k;
+				}
+			}
+			if(have_0_255)
+			{
+				int dist_0 = ColorDist(color, color_type_info<T>::min_value);
+				if(dist_0 <= bestdist)
+				{
+					bestdist = dist_0;
+					out.do_or(i, (1 << bpp) - 2);
+					score += bestdist;
+					continue;
+				}
+				int dist_255 = ColorDist(color, color_type_info<T>::max_value);
+				if(dist_255 <= bestdist)
+				{
+					bestdist = dist_255;
+					out.do_or(i, (1 << bpp) - 1);
+					score += bestdist;
+					continue;
+				}
+			}
+
+			// record
+			res.add(best, color);
+			out.do_or(i, best);
+			score += bestdist;
+		}
+		return score;
+	}
+
+	// REFINE_LOOP: refine, take result over only if score improved, loop until it did not
+	inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
+	{
+		bitarray<uint64_t, 16, 3> out2;
+		unsigned char a0next = a0, a1next = a1;
+		unsigned int s = 0x7FFFFFFF;
+		for(;;)
+		{
+			unsigned char ramp[2] = {
+				a0next,
+				a1next
+			};
+			s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
+			unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
+			if(s2 < s)
+			{
+				out = out2;
+				s = s2;
+				a0 = a0next;
+				a1 = a1next;
+				if(!r2.evaluate(a0next, a1next))
+					break;
+			}
+			else
+				break;
+			out2.clear();
+		}
+
+		if(a1 == a0)
+		{
+			if(a0 == 255)
+				--a1;
+			else
+				++a1;
+			for(int i = 0; i < 16; ++i) switch(out.get(i))
+			{
+				case 1:
+					out.set(i, 0);
+					break;
+			}
+		}
+
+		if(a1 < a0)
+		{
+			std::swap(a0, a1);
+			for(int i = 0; i < 16; ++i) switch(out.get(i))
+			{
+				case 0:
+					out.set(i, 1);
+					break;
+				case 1:
+					out.set(i, 0);
+					break;
+				case 6:
+				case 7:
+					break;
+				default:
+					out.set(i, 7 - out.get(i));
+					break;
+			}
+		}
+	}
+
+	// REFINE_ALWAYS: refine, do not check
+	inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
+	{
+		unsigned char ramp[2] = {
+			a0,
+			a1
+		};
+		s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
+		s2tc_try_encode_block<unsigned char, int, 3, false, true, 6>(out, r2, alpha_dist, in, iw, w, h, ramp);
+		r2.evaluate(a0, a1);
+
+		if(a1 == a0)
+		{
+			if(a0 == 255)
+				--a1;
+			else
+				++a1;
+			for(int i = 0; i < 16; ++i) switch(out.get(i))
+			{
+				case 1:
+					out.set(i, 0);
+					break;
+			}
+		}
+
+		if(a1 < a0)
+		{
+			std::swap(a0, a1);
+			for(int i = 0; i < 16; ++i) switch(out.get(i))
+			{
+				case 0:
+					out.set(i, 1);
+					break;
+				case 1:
+					out.set(i, 0);
+					break;
+				case 6:
+				case 7:
+					break;
+				default:
+					out.set(i, 7 - out.get(i));
+					break;
+			}
+		}
+	}
+
+	// REFINE_NEVER: do not refine
+	inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
+	{
+		if(a1 < a0)
+			std::swap(a0, a1);
+		unsigned char ramp[6] = {
+			a0,
+			a1
+		};
+		s2tc_evaluate_colors_result_null_t<unsigned char> r2;
+		s2tc_try_encode_block<unsigned char, int, 3, false, true, 6>(out, r2, alpha_dist, in, iw, w, h, ramp);
+	}
+
+	// REFINE_LOOP: refine, take result over only if score improved, loop until it did not
+	template<ColorDistFunc ColorDist, bool have_trans>
+	inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
+	{
+		bitarray<uint32_t, 16, 2> out2;
+		color_t c0next = c0, c1next = c1;
+		unsigned int s = 0x7FFFFFFF;
+		for(;;)
+		{
+			color_t ramp[2] = {
+				c0next,
+				c1next
+			};
+			s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
+			unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
+			if(s2 < s)
+			{
+				out = out2;
+				s = s2;
+				c0 = c0next;
+				c1 = c1next;
+				if(!r2.evaluate(c0next, c1next))
+					break;
+			}
+			else
+				break;
+			out2.clear();
+		}
+
+		if(c0 == c1)
+		{
+			if(c0 == color_type_info<color_t>::max_value)
+				--c1;
+			else
+				++c1;
+			for(int i = 0; i < 16; ++i)
+				if(!(out.get(i) == 1))
+					out.set(i, 0);
+		}
+
+		if(have_trans ? c1 < c0 : c0 < c1)
+		{
+			std::swap(c0, c1);
+			for(int i = 0; i < 16; ++i)
+				if(!(out.get(i) & 2))
+					out.do_xor(i, 1);
+		}
+	}
+
+	// REFINE_ALWAYS: refine, do not check
+	template<ColorDistFunc ColorDist, bool have_trans>
+	inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
+	{
+		color_t ramp[2] = {
+			c0,
+			c1
+		};
+		s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
+		s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
+		r2.evaluate(c0, c1);
+
+		if(c0 == c1)
+		{
+			if(c0 == color_type_info<color_t>::max_value)
+				--c1;
+			else
+				++c1;
+			for(int i = 0; i < 16; ++i)
+				if(!(out.get(i) == 1))
+					out.set(i, 0);
+		}
+
+		if(have_trans ? c1 < c0 : c0 < c1)
+		{
+			std::swap(c0, c1);
+			for(int i = 0; i < 16; ++i)
+				if(!(out.get(i) & 2))
+					out.do_xor(i, 1);
+		}
+	}
+
+	// REFINE_NEVER: do not refine
+	template<ColorDistFunc ColorDist, bool have_trans>
+	inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
+	{
+		if(have_trans ? c1 < c0 : c0 < c1)
+			std::swap(c0, c1);
+		color_t ramp[2] = {
+			c0,
+			c1
+		};
+		s2tc_evaluate_colors_result_null_t<color_t> r2;
+		s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
+	}
+
+	inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
+	{
+		for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
+		{
+			int i = y * 4 + x;
+			const unsigned char *pix = &in[(y * iw + x) * 4];
+			out.do_or(i, pix[3]);
+		}
+	}
+
+	template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
+	inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
+	{
+		color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
+		unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
+		int x, y;
+
+		if(mode == MODE_FAST)
+		{
+			// FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
+
+			color_t c0 = make_color_t(0, 0, 0);
+
+			// dummy values because we don't know whether the first pixel willw rite
+			c[0].r = 31;
+			c[0].g = 63;
+			c[0].b = 31;
+			c[1].r = 0;
+			c[1].g = 0;
+			c[1].b = 0;
+			int dmin = 0x7FFFFFFF;
+			int dmax = 0;
+			if(dxt == DXT5)
+			{
+				ca[0] = rgba[3];
+				ca[1] = ca[0];
+			}
+
+			for(x = 0; x < w; ++x)
+				for(y = 0; y < h; ++y)
+				{
+					c[2].r = rgba[(x + y * iw) * 4 + 0];
+					c[2].g = rgba[(x + y * iw) * 4 + 1];
+					c[2].b = rgba[(x + y * iw) * 4 + 2];
+					ca[2]  = rgba[(x + y * iw) * 4 + 3];
+					// MODE_FAST doesn't work for normalmaps, so this works
+					if(!ca[2])
+						continue;
+
+					int d = ColorDist(c[2], c0);
+					if(d > dmax)
+					{
+						dmax = d;
+						c[1] = c[2];
+					}
+					if(d < dmin)
+					{
+						dmin = d;
+						c[0] = c[2];
+					}
+
+					if(dxt == DXT5)
+					{
+						if(ca[2] != 255)
+						{
+							if(ca[2] > ca[1])
+								ca[1] = ca[2];
+							if(ca[2] < ca[0])
+								ca[0] = ca[2];
+						}
+					}
+				}
+		}
+		else
+		{
+			int n = 0, m = 0;
+
+			for(x = 0; x < w; ++x)
+				for(y = 0; y < h; ++y)
+				{
+					c[n].r = rgba[(x + y * iw) * 4 + 0];
+					c[n].g = rgba[(x + y * iw) * 4 + 1];
+					c[n].b = rgba[(x + y * iw) * 4 + 2];
+					ca[n]  = rgba[(x + y * iw) * 4 + 3];
+					++n;
+				}
+			if(n == 0)
+			{
+				n = 1;
+				c[0].r = 0;
+				c[0].g = 0;
+				c[0].b = 0;
+				ca[0] = 0;
+			}
+			m = n;
+
+			if(nrandom > 0)
+			{
+				color_t mins = c[0];
+				color_t maxs = c[0];
+				unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
+				unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
+				for(x = 1; x < n; ++x)
+				{
+					mins.r = min(mins.r, c[x].r);
+					mins.g = min(mins.g, c[x].g);
+					mins.b = min(mins.b, c[x].b);
+					maxs.r = max(maxs.r, c[x].r);
+					maxs.g = max(maxs.g, c[x].g);
+					maxs.b = max(maxs.b, c[x].b);
+					if(dxt == DXT5)
+					{
+						mina = min(mina, ca[x]);
+						maxa = max(maxa, ca[x]);
+					}
+				}
+				color_t len = make_color_t(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
+				int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
+				for(x = 0; x < nrandom; ++x)
+				{
+					c[m].r = mins.r + rand() % len.r;
+					c[m].g = mins.g + rand() % len.g;
+					c[m].b = mins.b + rand() % len.b;
+					if(dxt == DXT5)
+						ca[m] = mina + rand() % lena;
+					++m;
+				}
+			}
+			else
+			{
+				// hack for last miplevel
+				if(n == 1)
+				{
+					c[1] = c[0];
+					m = n = 2;
+				}
+			}
+
+			reduce_colors_inplace(c, n, m, ColorDist);
+			if(dxt == DXT5)
+				reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
+		}
+
+		// equal colors are BAD
+		if(c[0] == c[1])
+		{
+			if(c[0] == color_type_info<color_t>::max_value)
+				--c[1];
+			else
+				++c[1];
+		}
+
+		if(dxt == DXT5)
+		{
+			if(ca[0] == ca[1])
+			{
+				if(ca[0] == 255)
+					--ca[1];
+				else
+					++ca[1];
+			}
+		}
+
+		switch(dxt)
+		{
+			case DXT1:
+				{
+					bitarray<uint32_t, 16, 2> colorblock;
+					switch(refine)
+					{
+						case REFINE_NEVER:
+							s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
+							break;
+						case REFINE_ALWAYS:
+							s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
+							break;
+						case REFINE_LOOP:
+							s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
+							break;
+					}
+					out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
+					out[1] = (c[0].r << 3) | (c[0].g >> 3);
+					out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
+					out[3] = (c[1].r << 3) | (c[1].g >> 3);
+					colorblock.tobytes(&out[4]);
+				}
+				break;
+			case DXT3:
+				{
+					bitarray<uint32_t, 16, 2> colorblock;
+					bitarray<uint64_t, 16, 4> alphablock;