text / benchmarks / haskell / Benchmarks / DecodeUtf8.hs

The default branch has multiple heads

{-# LANGUAGE ForeignFunctionInterface #-}

-- | Test decoding of UTF-8
-- Tested in this benchmark:
-- * Decoding bytes using UTF-8
-- In some tests:
-- * Taking the length of the result
-- * Taking the init of the result
-- The latter are used for testing stream fusion.
module Benchmarks.DecodeUtf8
    ( benchmark
    ) where

import Foreign.C.Types
import Data.ByteString.Internal (ByteString(..))
import Foreign.Ptr (Ptr, plusPtr)
import Foreign.ForeignPtr (withForeignPtr)
import Data.Word (Word8)
import qualified Criterion as C
import Criterion (Benchmark, bgroup, nf)
import qualified Codec.Binary.UTF8.Generic as U8
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> FilePath -> IO Benchmark
benchmark kind fp = do
    bs  <- B.readFile fp
    lbs <- BL.readFile fp
    let bench name = C.bench (name ++ "+" ++ kind)
    return $ bgroup "DecodeUtf8"
        [ bench "Strict" $ nf T.decodeUtf8 bs
        , bench "IConv" $ iconv bs
        , bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs
        , bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs
        , bench "Lazy" $ nf TL.decodeUtf8 lbs
        , bench "LazyLength" $ nf (TL.length . TL.decodeUtf8) lbs
        , bench "LazyInitLength" $ nf (TL.length . TL.init . TL.decodeUtf8) lbs
        , bench "StrictStringUtf8" $ nf U8.toString bs
        , bench "StrictStringUtf8Length" $ nf (length . U8.toString) bs
        , bench "LazyStringUtf8" $ nf U8.toString lbs
        , bench "LazyStringUtf8Length" $ nf (length . U8.toString) lbs

iconv :: ByteString -> IO CInt
iconv (PS fp off len) = withForeignPtr fp $ \ptr ->
                        time_iconv (ptr `plusPtr` off) (fromIntegral len)

foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.