Bryan O'Sullivan avatar Bryan O'Sullivan committed 876c426

Transplant UTF-8 decoding benchmarks as of 44d20dca8f35

Comments (0)

Files changed (3)

tests/benchmarks/src/Data/Text/Benchmarks.hs

     -- Traditional benchmarks
     bs <- sequence
         [ Builder.benchmark
-        , DecodeUtf8.benchmark (tf "russian.txt")
+        , DecodeUtf8.benchmark "html" (tf "libya-chinese.html")
+        , DecodeUtf8.benchmark "xml" (tf "yiwiki.xml")
+        , DecodeUtf8.benchmark "ascii" (tf "ascii.txt")
+        , DecodeUtf8.benchmark "russian" (tf "russian.txt")
+        , DecodeUtf8.benchmark "japanese" (tf "japanese.txt")
         , EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
         , Equality.benchmark (tf "japanese.txt")
         , FileRead.benchmark (tf "russian.txt")

tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs

+{-# LANGUAGE ForeignFunctionInterface #-}
+
 -- | Test decoding of UTF-8
 --
 -- Tested in this benchmark:
     ( benchmark
     ) where
 
-import Criterion (Benchmark, bgroup, bench, nf)
+import Foreign.C.Types (CInt, CSize)
+import Data.ByteString.Internal (ByteString(..))
+import Foreign.Ptr (Ptr, plusPtr)
+import Foreign.ForeignPtr (withForeignPtr)
+import Data.Word (Word8)
+import qualified Criterion as C
+import Criterion (Benchmark, bgroup, nf)
 import qualified Codec.Binary.UTF8.Generic as U8
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
 
-benchmark :: FilePath -> IO Benchmark
-benchmark fp = do
+benchmark :: String -> FilePath -> IO Benchmark
+benchmark kind fp = do
     bs  <- B.readFile fp
     lbs <- BL.readFile fp
+    let bench name = C.bench (name ++ "+" ++ kind)
     return $ bgroup "DecodeUtf8"
         [ bench "Strict" $ nf T.decodeUtf8 bs
+        , bench "IConv" $ iconv bs
         , bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs
         , bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs
         , bench "Lazy" $ nf TL.decodeUtf8 lbs
         , bench "LazyStringUtf8" $ nf U8.toString lbs
         , bench "LazyStringUtf8Length" $ nf (length . U8.toString) lbs
         ]
+
+iconv :: ByteString -> IO CInt
+iconv (PS fp off len) = withForeignPtr fp $ \ptr ->
+                        time_iconv (ptr `plusPtr` off) (fromIntegral len)
+
+foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt

tests/benchmarks/text-benchmarks.cabal

 executable text-benchmarks
   hs-source-dirs: src ../..
   c-sources:      ../../cbits/cbits.c
+                  cbits/time_iconv.c
   main-is:        Data/Text/Benchmarks.hs
   ghc-options:    -Wall -O2
   cpp-options:    -DHAVE_DEEPSEQ
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.