Commits

ecsv committed b663cc8

Reduce GCC specific code

  • Participants
  • Parent commits dbf4af6

Comments (0)

Files changed (7)

src/Glide64/TexCache.cpp

 #include "Gfx #1.3.h"
 #include "TexCache.h"
 #include "Combine.h"
+#include "Util.h"
 
 void LoadTex (int id, int tmu);
 

src/Glide64/TexLoad16b.h

     v9 = wid_64;
     do
     {
-      v10 = __builtin_bswap32(*v6);
-      v11 = __builtin_bswap32(v6[1]);
+      v10 = bswap32(*v6);
+      v11 = bswap32(v6[1]);
       ALOWORD(v10) = __ROR__((uint16_t)v10, 1);
       ALOWORD(v11) = __ROR__((uint16_t)v11, 1);
       v10 = __ROR__(v10, 16);
     v14 = wid_64;
     do
     {
-      v15 = __builtin_bswap32(v12[1]);
-      v16 = __builtin_bswap32(*v12);
+      v15 = bswap32(v12[1]);
+      v16 = bswap32(*v12);
       ALOWORD(v15) = __ROR__((uint16_t)v15, 1);
       ALOWORD(v16) = __ROR__((uint16_t)v16, 1);
       v15 = __ROR__(v15, 16);

src/Glide64/TexLoad4b.h

 extern "C" void asmLoad4bIA(uint8_t *src, uint8_t *dst, int wid_64, int height, int line, int ext);
 extern "C" void asmLoad4bI(uint8_t *src, uint8_t *dst, int wid_64, int height, int line, int ext);
 
-#define ALOWORD(x)   (*((uint16_t*)&(x)))   // low word
-
-template<class T> static inline T __ROR__(T value, unsigned int count)
-{
-  const unsigned int nbits = sizeof(T) * 8;
-  count %= nbits;
-
-  T low = value << (nbits - count);
-  value >>= count;
-  value |= low;
-  return value;
-}
-
 static inline void load4bCI(uint8_t *src, uint8_t *dst, int wid_64, int height, uint16_t line, int ext, uint16_t *pal)
 {
   uint8_t *v7;
     do
     {
       v11 = v10;
-      v12 = __builtin_bswap32(*(uint32_t *)v7);
+      v12 = bswap32(*(uint32_t *)v7);
       v13 = v7 + 4;
       ALOWORD(v10) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 23) & 0x1E)), 1);
       v14 = v10 << 16;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 3) & 0x1E)), 1);
       *v15 = v14;
       ++v15;
-      v16 = __builtin_bswap32(*(uint32_t *)v13);
+      v16 = bswap32(*(uint32_t *)v13);
       v7 = v13 + 4;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 23) & 0x1E)), 1);
       v14 <<= 16;
     do
     {
       v20 = v19;
-      v21 = __builtin_bswap32(*((uint32_t *)v17 + 1));
+      v21 = bswap32(*((uint32_t *)v17 + 1));
       ALOWORD(v19) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 23) & 0x1E)), 1);
       v22 = v19 << 16;
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 27) & 0x1E)), 1);
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 3) & 0x1E)), 1);
       *v23 = v22;
       ++v23;
-      v24 = __builtin_bswap32(*(uint32_t *)v17);
+      v24 = bswap32(*(uint32_t *)v17);
       v17 = &src[((uintptr_t)v17 + 8 - (uintptr_t)src) & 0x7FF];
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 23) & 0x1E)), 1);
       v22 <<= 16;
     do
     {
       v11 = v10;
-      v12 = __builtin_bswap32(*(uint32_t *)v7);
+      v12 = bswap32(*(uint32_t *)v7);
       v13 = (uint32_t *)(v7 + 4);
       ALOWORD(v10) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 23) & 0x1E)), 8);
       v14 = v10 << 16;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 3) & 0x1E)), 8);
       *v15 = v14;
       ++v15;
-      v16 = __builtin_bswap32(*v13);
+      v16 = bswap32(*v13);
       v7 = (uint8_t *)(v13 + 1);
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 23) & 0x1E)), 8);
       v14 <<= 16;
     do
     {
       v20 = v19;
-      v21 = __builtin_bswap32(*((uint32_t *)v17 + 1));
+      v21 = bswap32(*((uint32_t *)v17 + 1));
       ALOWORD(v19) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 23) & 0x1E)), 8);
       v22 = v19 << 16;
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 27) & 0x1E)), 8);
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 3) & 0x1E)), 8);
       *v23 = v22;
       ++v23;
-      v24 = __builtin_bswap32(*(uint32_t *)v17);
+      v24 = bswap32(*(uint32_t *)v17);
       v17 = &src[((uintptr_t)v17 + 8 - (uintptr_t)src) & 0x7FF];
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 23) & 0x1E)), 8);
       v22 <<= 16;
     do
     {
       v10 = v9;
-      v11 = __builtin_bswap32(*v6);
+      v11 = bswap32(*v6);
       v12 = v6 + 1;
       v13 = v11;
       v14 = 8 * (v11 & 0x100000) | 4 * (v11 & 0x100000) | 2 * (v11 & 0x100000) | v11 & 0x100000 | (((v11 >> 16) & 0xE00) >> 3) & 0x100 | (v11 >> 16) & 0xE00 | 8 * ((v11 >> 12) & 0x1000) | 4 * ((v11 >> 12) & 0x1000) | 2 * ((v11 >> 12) & 0x1000) | (v11 >> 12) & 0x1000 | (((v11 >> 28) & 0xE) >> 3) | (v11 >> 28) & 0xE | 8 * ((v11 >> 24) & 0x10) | 4 * ((v11 >> 24) & 0x10) | 2 * ((v11 >> 24) & 0x10) | (v11 >> 24) & 0x10;
       v21 >>= 3;
       *v16 = (((v13 << 24) & 0xE000000) >> 3) & 0x1000000 | (v13 << 24) & 0xE000000 | 8 * ((v13 << 28) & 0x10000000) | 4 * ((v13 << 28) & 0x10000000) | 2 * ((v13 << 28) & 0x10000000) | (v13 << 28) & 0x10000000 | v21 & 0x10000 | v22;
       ++v16;
-      v23 = __builtin_bswap32(*v12);
+      v23 = bswap32(*v12);
       v6 = v12 + 1;
       v24 = v23;
       v25 = 8 * (v23 & 0x100000) | 4 * (v23 & 0x100000) | 2 * (v23 & 0x100000) | v23 & 0x100000 | (((v23 >> 16) & 0xE00) >> 3) & 0x100 | (v23 >> 16) & 0xE00 | 8 * ((v23 >> 12) & 0x1000) | 4 * ((v23 >> 12) & 0x1000) | 2 * ((v23 >> 12) & 0x1000) | (v23 >> 12) & 0x1000 | (((v23 >> 28) & 0xE) >> 3) | (v23 >> 28) & 0xE | 8 * ((v23 >> 24) & 0x10) | 4 * ((v23 >> 24) & 0x10) | 2 * ((v23 >> 24) & 0x10) | (v23 >> 24) & 0x10;
     do
     {
       v36 = v35;
-      v37 = __builtin_bswap32(v33[1]);
+      v37 = bswap32(v33[1]);
       v38 = v37 >> 4;
       v38 &= 0xE0000u;
       v39 = v38 | 8 * (v37 & 0x100000) | 4 * (v37 & 0x100000) | 2 * (v37 & 0x100000) | v37 & 0x100000 | (((v37 >> 16) & 0xE00) >> 3) & 0x100 | (v37 >> 16) & 0xE00 | 8 * ((v37 >> 12) & 0x1000) | 4 * ((v37 >> 12) & 0x1000) | 2 * ((v37 >> 12) & 0x1000) | (v37 >> 12) & 0x1000 | (((v37 >> 28) & 0xE) >> 3) | (v37 >> 28) & 0xE | 8 * ((v37 >> 24) & 0x10) | 4 * ((v37 >> 24) & 0x10) | 2 * ((v37 >> 24) & 0x10) | (v37 >> 24) & 0x10;
       v45 >>= 3;
       *v40 = (((v37 << 24) & 0xE000000) >> 3) & 0x1000000 | (v37 << 24) & 0xE000000 | 8 * ((v37 << 28) & 0x10000000) | 4 * ((v37 << 28) & 0x10000000) | 2 * ((v37 << 28) & 0x10000000) | (v37 << 28) & 0x10000000 | v45 & 0x10000 | v46;
       ++v40;
-      v47 = __builtin_bswap32(*v33);
+      v47 = bswap32(*v33);
       v33 += 2;
       v48 = v47;
       v49 = 8 * (v47 & 0x100000) | 4 * (v47 & 0x100000) | 2 * (v47 & 0x100000) | v47 & 0x100000 | (((v47 >> 16) & 0xE00) >> 3) & 0x100 | (v47 >> 16) & 0xE00 | 8 * ((v47 >> 12) & 0x1000) | 4 * ((v47 >> 12) & 0x1000) | 2 * ((v47 >> 12) & 0x1000) | (v47 >> 12) & 0x1000 | (((v47 >> 28) & 0xE) >> 3) | (v47 >> 28) & 0xE | 8 * ((v47 >> 24) & 0x10) | 4 * ((v47 >> 24) & 0x10) | 2 * ((v47 >> 24) & 0x10) | (v47 >> 24) & 0x10;
     do
     {
       v10 = v9;
-      v11 = __builtin_bswap32(*v6);
+      v11 = bswap32(*v6);
       v12 = v6 + 1;
       v13 = v11;
       v14 = 16 * ((v11 >> 16) & 0xF00) | (v11 >> 16) & 0xF00 | 16 * (v11 >> 28) | (v11 >> 28);
       v16 = v13 << 12;
       *v15 = 16 * ((v13 << 24) & 0xF000000) | (v13 << 24) & 0xF000000 | 16 * (v16 & 0xF0000) | v16 & 0xF0000 | 16 * (v13 & 0xF00) | v13 & 0xF00 | 16 * ((uint16_t)v13 >> 12) | ((uint16_t)v13 >> 12);
       ++v15;
-      v17 = __builtin_bswap32(*v12);
+      v17 = bswap32(*v12);
       v6 = v12 + 1;
       v18 = v17;
       v19 = 16 * ((v17 >> 16) & 0xF00) | (v17 >> 16) & 0xF00 | 16 * (v17 >> 28) | (v17 >> 28);
     do
     {
       v24 = v23;
-      v25 = __builtin_bswap32(v21[1]);
+      v25 = bswap32(v21[1]);
       v26 = v25 >> 4;
       *v22 = 16 * ((v25 << 8) & 0xF000000) | (v25 << 8) & 0xF000000 | 16 * (v26 & 0xF0000) | v26 & 0xF0000 | 16 * ((v25 >> 16) & 0xF00) | (v25 >> 16) & 0xF00 | 16 * (v25 >> 28) | (v25 >> 28);
       v27 = v22 + 1;
       v28 = v25 << 12;
       *v27 = 16 * ((v25 << 24) & 0xF000000) | (v25 << 24) & 0xF000000 | 16 * (v28 & 0xF0000) | v28 & 0xF0000 | 16 * (v25 & 0xF00) | v25 & 0xF00 | 16 * ((uint16_t)v25 >> 12) | ((uint16_t)v25 >> 12);
       ++v27;
-      v29 = __builtin_bswap32(*v21);
+      v29 = bswap32(*v21);
       v21 += 2;
       v30 = v29;
       v31 = 16 * ((v29 >> 16) & 0xF00) | (v29 >> 16) & 0xF00 | 16 * (v29 >> 28) | (v29 >> 28);

src/Glide64/TexLoad8b.h

     do
     {
       v11 = v10;
-      v12 = __builtin_bswap32(*(uint32_t *)v7);
+      v12 = bswap32(*(uint32_t *)v7);
       v13 = (uint32_t *)(v7 + 4);
       ALOWORD(v10) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 15) & 0x1FE)), 1);
       v14 = v10 << 16;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 7) & 0x1FE)), 1);
       *v15 = v14;
       ++v15;
-      v16 = __builtin_bswap32(*v13);
+      v16 = bswap32(*v13);
       v7 = (uint8_t *)(v13 + 1);
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 15) & 0x1FE)), 1);
       v14 <<= 16;
     do
     {
       v20 = v19;
-      v21 = __builtin_bswap32(v17[1]);
+      v21 = bswap32(v17[1]);
       ALOWORD(v19) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 15) & 0x1FE)), 1);
       v22 = v19 << 16;
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 23) & 0x1FE)), 1);
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 7) & 0x1FE)), 1);
       *v23 = v22;
       ++v23;
-      v24 = __builtin_bswap32(*v17);
+      v24 = bswap32(*v17);
       v17 = (uint32_t *)&src[((uintptr_t)v17 + 8 - (uintptr_t)src) & 0x7FF];
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 15) & 0x1FE)), 1);
       v22 <<= 16;
     do
     {
       v11 = v10;
-      v12 = __builtin_bswap32(*v7);
+      v12 = bswap32(*v7);
       v13 = v7 + 1;
       ALOWORD(v10) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 15) & 0x1FE)), 8);
       v14 = v10 << 16;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v12 >> 7) & 0x1FE)), 8);
       *v15 = v14;
       ++v15;
-      v16 = __builtin_bswap32(*v13);
+      v16 = bswap32(*v13);
       v7 = v13 + 1;
       ALOWORD(v14) = __ROR__(*(uint16_t *)((char *)pal + ((v16 >> 15) & 0x1FE)), 8);
       v14 <<= 16;
     do
     {
       v20 = v19;
-      v21 = __builtin_bswap32(v17[1]);
+      v21 = bswap32(v17[1]);
       ALOWORD(v19) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 15) & 0x1FE)), 8);
       v22 = v19 << 16;
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 23) & 0x1FE)), 8);
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v21 >> 7) & 0x1FE)), 8);
       *v23 = v22;
       ++v23;
-      v24 = __builtin_bswap32(*v17);
+      v24 = bswap32(*v17);
       v17 += 2;
       ALOWORD(v22) = __ROR__(*(uint16_t *)((char *)pal + ((v24 >> 15) & 0x1FE)), 8);
       v22 <<= 16;

src/Glide64/Util.h

 			lx = lc; \
 		}
 
+#if defined(__GNUC__)
+  #define bswap32(x) __builtin_bswap32(x)
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+  #include <stdlib.h>
+  #define bswap32(x) _byteswap_ulong(x)
+#else
+static inline uint32_t bswap32(uint32_t val)
+{
+	return (((val & 0xff000000) >> 24) |
+		((val & 0x00ff0000) >>  8) |
+		((val & 0x0000ff00) <<  8) |
+		((val & 0x000000ff) << 24));
+}
+#endif
+
+#define ALOWORD(x)   (*((uint16_t*)&(x)))   // low word
+
+template<class T> static inline T __ROR__(T value, unsigned int count)
+{
+  const unsigned int nbits = sizeof(T) * 8;
+  count %= nbits;
+
+  T low = value << (nbits - count);
+  value >>= count;
+  value |= low;
+  return value;
+}
+
+// rotate left
+template<class T> static T __ROL__(T value, unsigned int count)
+{
+  const unsigned int nbits = sizeof(T) * 8;
+  count %= nbits;
+
+  T high = value >> (nbits - count);
+  value <<= count;
+  value |= high;
+  return value;
+}
+
 #endif  // ifndef Util_H

src/Glide64/rdp.cpp

   }
 }
 
-// rotate left
-template<class T> static T __ROL__(T value, unsigned int count)
-{
-  const unsigned int nbits = sizeof(T) * 8;
-  count %= nbits;
-
-  T high = value >> (nbits - count);
-  value <<= count;
-  value |= high;
-  return value;
-}
-
 extern "C" void asmLoadBlock(uint32_t *src, uint32_t *dst, uint32_t off, int dxt, int cnt, int swp);
 
 static inline void loadBlock(uint32_t *src, uint32_t *dst, uint32_t off, int dxt, int cnt)
     while ( v9 );
     v12 = *v11;
     v7 = v11 + 1;
-    *v5 = __builtin_bswap32(v12);
+    *v5 = bswap32(v12);
     ++v5;
     v6 = cnt - 1;
     if ( cnt != 1 )
 LABEL_23:
       do
       {
-        *v5 = __builtin_bswap32(*v7);
-        v5[1] = __builtin_bswap32(v7[1]);
+        *v5 = bswap32(*v7);
+        v5[1] = bswap32(v7[1]);
         v7 += 2;
         v5 += 2;
         --v6;
       while ( v15 );
       v18 = *v17;
       v13 = v17 + 1;
-      *v7 = __builtin_bswap32(v18);
+      *v7 = bswap32(v18);
       ++v7;
       --v8;
       if ( v8 )
 LABEL_20:
         do
         {
-          *v7 = __builtin_bswap32(*v13);
-          v7[1] = __builtin_bswap32(v13[1]);
+          *v7 = bswap32(*v13);
+          v7[1] = bswap32(v13[1]);
           v13 += 2;
           v7 += 2;
           --v8;

src/GlideHQ/TxUtil.cpp

 int
 TxUtil::log2(int num)
 {
+#if defined(__GNUC__)
   return __builtin_ctz(num);
+#elif defined(__MSC__)
+  __asm {
+    mov eax, dword ptr [num];
+    bsr eax, eax;
+    mov dword ptr [i], eax;
+  }
+#else
+  switch (num) {
+    case 1:    return 0;
+    case 2:    return 1;
+    case 4:    return 2;
+    case 8:    return 3;
+    case 16:   return 4;
+    case 32:   return 5;
+    case 64:   return 6;
+    case 128:  return 7;
+    case 256:  return 8;
+    case 512:  return 9;
+    case 1024:  return 10;
+    case 2048:  return 11;
+  }
+#endif
 }
 
 int