Bryan O'Sullivan avatar Bryan O'Sullivan committed 416df53 Merge

Merge pull request #1 from vincenthz/master

speedup encode and decode

Comments (0)

Files changed (3)

Data/ByteString/Base16.hs

-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BangPatterns, MagicHash #-}
 
 -- |
 -- Module      : Data.ByteString.Base16
     , decode
     ) where
 
-import Control.Monad (forM_)
-import Data.Bits ((.&.), shiftL, shiftR)
 import Data.ByteString.Char8 (empty)
 import Data.ByteString.Internal (ByteString(..), createAndTrim', unsafeCreate)
-import Data.Word (Word8)
+import Data.Bits (shiftL)
 import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
-import Foreign.Marshal.Alloc (mallocBytes)
 import Foreign.Ptr (Ptr, minusPtr, plusPtr)
 import Foreign.Storable (peek, poke)
 import System.IO.Unsafe (unsafePerformIO)
+import GHC.Prim
+import GHC.Types
+import GHC.Word
 
 -- | Encode a string into base16 form.  The result will always be a
 -- multiple of 2 bytes in length.
     go s d | s == e = return ()
            | otherwise = do
       x <- peek8 s
-      poke d =<< (peek (digits `plusPtr` (x `shiftR` 4)) :: IO Word8)
-      poke (d `plusPtr` 1) =<< (peek (digits `plusPtr` (x .&. 0xf)) :: IO Word8)
+      poke d (tlookup tableHi x)
+      poke (d `plusPtr` 1) (tlookup tableLo x)
       go (s `plusPtr` 1) (d `plusPtr` 2)
-  digits :: Ptr Word8
-  !digits = unsafePerformIO $ do
-             ptr <- mallocBytes 16
-             forM_ (zip [0..] "0123456789abcdef") $ \(i,c) ->
-               poke (ptr `plusPtr` i) ((fromIntegral (fromEnum c)) :: Word8)
-             return ptr
-  {-# NOINLINE digits #-}
+    tlookup :: Addr# -> Int -> Word8
+    tlookup table (I# index) = W8# (indexWord8OffAddr# table index)
+    !tableLo =
+      "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
+      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66"#
+    !tableHi =
+      "\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\
+      \\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\
+      \\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\
+      \\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\
+      \\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\
+      \\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\
+      \\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\
+      \\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\
+      \\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\
+      \\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\
+      \\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\
+      \\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\
+      \\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\
+      \\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\
+      \\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\
+      \\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66"#
 
 -- | Decode a string from base16 form. The first element of the
 -- returned tuple contains the decoded data. The second element starts
     go s d | s == e = let len = e `minusPtr` sptr
                       in return (0, len `div` 2, ps sfp (soff+len) (slen-len))
            | otherwise = do
-      let hex w
-              | w >= 48 && w <= 57  = w - 48
-              | w >= 97 && w <= 102 = w - 97 + 10
-              | w >= 65 && w <= 70  = w - 65 + 10
-              | otherwise           = 0xff
       hi <- hex `fmap` peek8 s
       lo <- hex `fmap` peek8 (s `plusPtr` 1)
       if lo == 0xff || hi == 0xff
           poke d . fromIntegral $ lo + (hi `shiftL` 4)
           go (s `plusPtr` 2) (d `plusPtr` 1)
 
+    hex (I# index) = W8# $ indexWord8OffAddr# table index
+    !table =
+        "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
+        \\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
+        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
 peek8 :: Ptr Word8 -> IO Int
 peek8 p = fromIntegral `fmap` peek p
 

base16-bytestring.cabal

   
   build-depends:
     base == 4.*,
-    bytestring == 0.9.*
+    bytestring == 0.9.*,
+    ghc-prim
 
   ghc-options: -Wall -funbox-strict-fields
   ghc-prof-options: -auto-all

benchmarks/Benchmarks.hs

          , bench "1024" $ whnf B16.encode (generate 1024)
          , bench "65536" $ whnf B16.encode (generate 65536)
          ]
+       , bgroup "decode" [
+           bench "8" $ whnf (B16.decode . B16.encode) (generate 8)
+         , bench "32" $ whnf (B16.decode . B16.encode) (generate 32)
+         , bench "128" $ whnf (B16.decode . B16.encode) (generate 128)
+         , bench "1024" $ whnf (B16.decode . B16.encode) (generate 1024)
+         , bench "65536" $ whnf (B16.decode . B16.encode) (generate 65536)
+         ]
        ]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.