Commits

ecsv committed c702188

Remove mini assembler code for byte swapping

The byte swapping was realized using inline assembler code using the bswap
instruction. This was less portable and even resulted in build failures when
compiling for OSX. It also reduced the effectivness of the optimizer slightly.

Instead the builtin functions of the compiler can be used to do the same.

  • Participants
  • Parent commits 195fe6c

Comments (0)

Files changed (2)

File src/Main.cpp

   for (i=0; i<0x200; i++)
   {
     // cur = ~*(data++), byteswapped
-#if !defined(__GNUC__) && !defined(NO_ASM)
-    __asm {
-      mov eax, dword ptr [data]
-        mov ecx, dword ptr [eax]
-        add eax, 4
-        mov dword ptr [data], eax
-        not ecx
-        bswap ecx
-        mov dword ptr [cur],ecx
-    }
-#elif !defined(NO_ASM)
-     asm volatile ("bswap %[cur]"
-           : [cur] "=g"(cur)
-           : "[cur]"(~*(data++))
-           );
+#if !defined(__GNUC__)
+     cur = _byteswap_ulong(~*(data++));
+#else
+     cur = __builtin_bswap32(~*(data++));
 #endif
 
     for (b=0x80000000; b!=0; b>>=1)
 
 int drawFlag = 1;	// draw flag for rendering callback
 
-#if defined(WIN32) || defined(NO_ASM)
-  #define BYTESWAP1(s1) s1 = ((s1 & 0xff) << 24) | ((s1 & 0xff00) << 8) | ((s1 & 0xff0000) >> 8) | ((s1 & 0xff000000) >> 24);
-  #define BYTESWAP2(s1,s2) s1 = ((s1 & 0xff) << 24) | ((s1 & 0xff00) << 8) | ((s1 & 0xff0000) >> 8) | ((s1 & 0xff000000) >> 24); \
-  s2 = ((s2 & 0xff) << 24) | ((s2 & 0xff00) << 8) | ((s2 & 0xff0000) >> 8) | ((s2 & 0xff000000) >> 24);
+#if defined(__GNUC__)
+  #define bswap32(x) __builtin_bswap32(x)
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+  #include <stdlib.h>
+  #define bswap32(x) _byteswap_ulong(x)
 #else
-  #define BYTESWAP1(s1) asm volatile (" bswap %0; " : "+r" (s1) : :);
-  #define BYTESWAP2(s1,s2) asm volatile (" bswap %0; bswap %1; " : "+r" (s1), "+r" (s2) : :);
+static inline uint32_t bswap32(uint32_t val)
+{
+        return (((val & 0xff000000) >> 24) |
+                ((val & 0x00ff0000) >>  8) |
+                ((val & 0x0000ff00) <<  8) |
+                ((val & 0x000000ff) << 24));
+}
 #endif
 
 // global strings
         int *pSrc = (int *) ((uintptr_t) gfx.RDRAM + SrcOffs);
         for (unsigned int x = 0; x < cnt; x++)
         {
-            int s1 = *pSrc++;
-            int s2 = *pSrc++;
-            BYTESWAP2(s1, s2)
+            int s1 = bswap32(*pSrc++);
+            int s2 = bswap32(*pSrc++);
             *pDst++ = s1;
             *pDst++ = s2;
         }
         // set source pointer to 4-byte aligned RDRAM location before the start
         int *pSrc = (int *) ((uintptr_t) gfx.RDRAM + (SrcOffs & 0xfffffffc));
         // do the first partial 32-bit word
-        int s0 = *pSrc++;
-        BYTESWAP1(s0)
+        int s0 = bswap32(*pSrc++);
         for (int x = 0; x < rem; x++)
             s0 >>= 8;
         for (int x = 4; x > rem; x--)
             s0 >>= 8;
         }
         // do one full 32-bit word
-        s0 = *pSrc++;
-        BYTESWAP1(s0)
+        s0 = bswap32(*pSrc++);
         *pDst++ = s0;
         // do 'cnt-1' 64-bit dwords
         for (unsigned int x = 0; x < cnt-1; x++)
         {
-            int s1 = *pSrc++;
-            int s2 = *pSrc++;
-            BYTESWAP2(s1, s2)
+            int s1 = bswap32(*pSrc++);
+            int s2 = bswap32(*pSrc++);
             *pDst++ = s1;
             *pDst++ = s2;
         }
         // do last partial 32-bit word
-        s0 = *pSrc++;
-        BYTESWAP1(s0)
+        s0 = bswap32(*pSrc++);
         for (; rem > 0; rem--)
         {
             *((char *) pDst) = s0 & 0xff;