Commits

Jasper Van der Jeugt  committed 821114f

Migrate benchmark suite to criterion/cabal

  • Participants
  • Parent commits 4eeea1d

Comments (0)

Files changed (56)

File tests/Benchmarks.hs

-{-# LANGUAGE BangPatterns, GADTs, MagicHash #-}
-
-import qualified Data.ByteString.Char8 as BS
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.ByteString.Lazy.Internal as BL
-import Control.Monad.Trans (liftIO)
-import Control.Exception (evaluate)
-import Criterion.Main
-import Data.Char
-import Data.Monoid (mappend, mempty)
-import qualified Data.ByteString.UTF8 as UTF8
-import qualified Data.Text as TS
-import qualified Data.Text.IO as TS
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Builder as TB
-import qualified Data.Text.Lazy.IO as TL
-import qualified Data.List as L
-import qualified Data.Text.Encoding as TS
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Criterion.MultiMap as M
-import Control.DeepSeq
-import Criterion.Config
-import GHC.Base
-
-
-myConfig
-    | False     = defaultConfig {
-                    -- Always display an 800x600 window.
-                    cfgPlot = M.singleton KernelDensity (Window 800 600)
-                  }
-    | otherwise = defaultConfig
-
-instance NFData BS.ByteString
-
-instance NFData BL.ByteString where
-    rnf BL.Empty        = ()
-    rnf (BL.Chunk _ ts) = rnf ts
-
-data B where
-    B :: NFData a => a -> B
-
-instance NFData B where
-    rnf (B b) = rnf b
-
-main = do
-  let dataFile = "text/test/russian.txt"
-  bsa <- BS.readFile dataFile
-  let tsa     = TS.decodeUtf8 bsa
-      tsb     = TS.toUpper tsa
-      tla     = TL.fromChunks (TS.chunksOf 16376 tsa)
-      tlb     = TL.fromChunks (TS.chunksOf 16376 tsb)
-      bsb     = TS.encodeUtf8 tsb
-      bla     = BL.fromChunks (chunksOf 16376 bsa)
-      blb     = BL.fromChunks (chunksOf 16376 bsb)
-      bsa_len = BS.length bsa
-      tsa_len = TS.length tsa
-      bla_len = BL.length bla
-      tla_len = TL.length tla
-      la      = UTF8.toString bsa
-      la_len  = L.length la
-      tsb_len = TS.length tsb
-      lb      = TS.unpack tsb
-      bsl     = BS.lines bsa
-      bll     = BL.lines bla
-      tsl     = TS.lines tsa
-      tll     = TL.lines tla
-      ll      = L.lines la
-  defaultMainWith
-    myConfig
-    (liftIO . evaluate $
-     rnf [B tsa, B tsb, B tla, B tlb, B bsa, B bsb, B bla, B blb,
-          B bsa_len, B tsa_len, B bla_len, B tla_len, B la, B la_len,
-          B tsb_len, B lb, B bsl, B bll, B tsl, B tll, B ll])
-    [
-      bgroup "append" [
-        bench "ts" $ nf (TS.append tsb) tsa
-      , bench "tl" $ nf (TL.append tlb) tla
-      , bench "bs" $ nf (BS.append bsb) bsa
-      , bench "bl" $ nf (BL.append blb) bla
-      , bench "l" $ nf ((++) lb) la
-      ],
-      bgroup "concat" [
-        bench "ts" $ nf TS.concat tsl
-      , bench "tl" $ nf TL.concat tll
-      , bench "bs" $ nf BS.concat bsl
-      , bench "bl" $ nf BL.concat bll
-      , bench "l" $ nf L.concat ll
-      ],
-      bgroup "cons" [
-        bench "ts" $ nf (TS.cons c) tsa
-      , bench "tl" $ nf (TL.cons c) tla
-      , bench "bs" $ nf (BS.cons c) bsa
-      , bench "bl" $ nf (BL.cons c) bla
-      , bench "l" $ nf (c:) la
-      ],
-      bgroup "concatMap" [
-        bench "ts" $ nf (TS.concatMap (TS.replicate 3 . TS.singleton)) tsa
-      , bench "tl" $ nf (TL.concatMap (TL.replicate 3 . TL.singleton)) tla
-      , bench "bs" $ nf (BS.concatMap (BS.replicate 3)) bsa
-      , bench "bl" $ nf (BL.concatMap (BL.replicate 3)) bla
-      , bench "l" $ nf (L.concatMap (L.replicate 3 . (:[]))) la
-      ],
-      bgroup "decode" [
-        bench "ts" $ nf TS.decodeUtf8 bsa
-      , bench "tl" $ nf TL.decodeUtf8 bla
-      , bench "bs" $ nf BS.unpack bsa
-      , bench "bl" $ nf BL.unpack bla
-      , bench "l" $ nf UTF8.toString bsa
-      ],
-      bgroup "drop" [
-        bench "ts" $ nf (TS.drop (tsa_len `div` 3)) tsa
-      , bench "tl" $ nf (TL.drop (tla_len `div` 3)) tla
-      , bench "bs" $ nf (BS.drop (bsa_len `div` 3)) bsa
-      , bench "bl" $ nf (BL.drop (bla_len `div` 3)) bla
-      , bench "l" $ nf (L.drop (la_len `div` 3)) la
-      ],
-      bgroup "encode" [
-        bench "ts" $ nf TS.encodeUtf8 tsa
-      , bench "tl" $ nf TL.encodeUtf8 tla
-      , bench "bs" $ nf BS.pack la
-      , bench "bl" $ nf BL.pack la
-      , bench "l" $ nf UTF8.fromString la
-      ],
-      bgroup "filter" [
-        bench "ts" $ nf (TS.filter p0) tsa
-      , bench "tl" $ nf (TL.filter p0) tla
-      , bench "bs" $ nf (BS.filter p0) bsa
-      , bench "bl" $ nf (BL.filter p0) bla
-      , bench "l" $ nf (L.filter p0) la
-      ],
-      bgroup "filter.filter" [
-        bench "ts" $ nf (TS.filter p1 . TS.filter p0) tsa
-      , bench "tl" $ nf (TL.filter p1 . TL.filter p0) tla
-      , bench "bs" $ nf (BS.filter p1 . BS.filter p0) bsa
-      , bench "bl" $ nf (BL.filter p1 . BL.filter p0) bla
-      , bench "l" $ nf (L.filter p1 . L.filter p0) la
-      ],
-      bgroup "foldl'" [
-        bench "ts" $ nf (TS.foldl' len 0) tsa
-      , bench "tl" $ nf (TL.foldl' len 0) tla
-      , bench "bs" $ nf (BS.foldl' len 0) bsa
-      , bench "bl" $ nf (BL.foldl' len 0) bla
-      , bench "l" $ nf (L.foldl' len 0) la
-      ],
-      bgroup "foldr" [
-        bench "ts" $ nf (L.length . TS.foldr (:) []) tsa
-      , bench "tl" $ nf (L.length . TL.foldr (:) []) tla
-      , bench "bs" $ nf (L.length . BS.foldr (:) []) bsa
-      , bench "bl" $ nf (L.length . BL.foldr (:) []) bla
-      , bench "l" $ nf (L.length . L.foldr (:) []) la
-      ],
-      bgroup "head" [
-        bench "ts" $ nf TS.head tsa
-      , bench "tl" $ nf TL.head tla
-      , bench "bs" $ nf BS.head bsa
-      , bench "bl" $ nf BL.head bla
-      , bench "l" $ nf L.head la
-      ],
-      bgroup "init" [
-        bench "ts" $ nf TS.init tsa
-      , bench "tl" $ nf TL.init tla
-      , bench "bs" $ nf BS.init bsa
-      , bench "bl" $ nf BL.init bla
-      , bench "l" $ nf L.init la
-      ],
-      bgroup "intercalate" [
-        bench "ts" $ nf (TS.intercalate tsw) tsl
-      , bench "tl" $ nf (TL.intercalate tlw) tll
-      , bench "bs" $ nf (BS.intercalate bsw) bsl
-      , bench "bl" $ nf (BL.intercalate blw) bll
-      , bench "l" $ nf (L.intercalate lw) ll
-      ],
-      bgroup "intersperse" [
-        bench "ts" $ nf (TS.intersperse c) tsa
-      , bench "tl" $ nf (TL.intersperse c) tla
-      , bench "bs" $ nf (BS.intersperse c) bsa
-      , bench "bl" $ nf (BL.intersperse c) bla
-      , bench "l" $ nf (L.intersperse c) la
-      ],
-      bgroup "isInfixOf" [
-        bench "ts" $ nf (TS.isInfixOf tsw) tsa
-      , bench "tl" $ nf (TL.isInfixOf tlw) tla
-      , bench "bs" $ nf (BS.isInfixOf bsw) bsa
-        -- no isInfixOf for lazy bytestrings
-      , bench "l" $ nf (L.isInfixOf lw) la
-      ],
-      bgroup "last" [
-        bench "ts" $ nf TS.last tsa
-      , bench "tl" $ nf TL.last tla
-      , bench "bs" $ nf BS.last bsa
-      , bench "bl" $ nf BL.last bla
-      , bench "l" $ nf L.last la
-      ],
-      bgroup "map" [
-        bench "ts" $ nf (TS.map f) tsa
-      , bench "tl" $ nf (TL.map f) tla
-      , bench "bs" $ nf (BS.map f) bsa
-      , bench "bl" $ nf (BL.map f) bla
-      , bench "l" $ nf (L.map f) la
-      ],
-      bgroup "mapAccumL" [
-        bench "ts" $ nf (TS.mapAccumL g 0) tsa
-      , bench "tl" $ nf (TL.mapAccumL g 0) tla
-      , bench "bs" $ nf (BS.mapAccumL g 0) bsa
-      , bench "bl" $ nf (BL.mapAccumL g 0) bla
-      , bench "l" $ nf (L.mapAccumL g 0) la
-      ],
-      bgroup "mapAccumR" [
-        bench "ts" $ nf (TS.mapAccumR g 0) tsa
-      , bench "tl" $ nf (TL.mapAccumR g 0) tla
-      , bench "bs" $ nf (BS.mapAccumR g 0) bsa
-      , bench "bl" $ nf (BL.mapAccumR g 0) bla
-      , bench "l" $ nf (L.mapAccumR g 0) la
-      ],
-      bgroup "map.map" [
-        bench "ts" $ nf (TS.map f . TS.map f) tsa
-      , bench "tl" $ nf (TL.map f . TL.map f) tla
-      , bench "bs" $ nf (BS.map f . BS.map f) bsa
-      , bench "bl" $ nf (BL.map f . BL.map f) bla
-      , bench "l" $ nf (L.map f . L.map f) la
-      ],
-      bgroup "readFile" [
-        bench "ts" $ TS.readFile dataFile
-      , bench "tl" $ nfIO (TL.readFile dataFile)
-      , bench "bs" $ BS.readFile dataFile
-      , bench "bl" $ nfIO (BL.length `fmap` BL.readFile dataFile)
-      , bench "l" $ nfIO (length `fmap` readFile dataFile)
-      ],
-      bgroup "replicate char" [
-        bench "ts" $ nf (TS.replicate bsa_len) (TS.singleton c)
-      , bench "tl" $ nf (TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
-      , bench "bs" $ nf (BS.replicate bsa_len) c
-      , bench "bl" $ nf (BL.replicate (fromIntegral bsa_len)) c
-      , bench "l" $ nf (L.replicate bsa_len) c
-      ],
-      bgroup "replicate string" [
-        bench "ts" $ nf (TS.replicate (bsa_len `div` TS.length tsw)) tsw
-      , bench "tl" $ nf (TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
-      , bench "l" $ nf (replicat (bsa_len `div` TS.length tsw)) lw
-      ],
-      bgroup "reverse" [
-        bench "ts" $ nf TS.reverse tsa
-      , bench "tl" $ nf TL.reverse tla
-      , bench "bs" $ nf BS.reverse bsa
-      , bench "bl" $ nf BL.reverse bla
-      , bench "l" $ nf L.reverse la
-      ],
-      bgroup "take" [
-        bench "ts" $ nf (TS.take (tsa_len `div` 3)) tsa
-      , bench "tl" $ nf (TL.take (tla_len `div` 3)) tla
-      , bench "bs" $ nf (BS.take (bsa_len `div` 3)) bsa
-      , bench "bl" $ nf (BL.take (bla_len `div` 3)) bla
-      , bench "l" $ nf (L.take (la_len `div` 3)) la
-      ],
-      bgroup "tail" [
-        bench "ts" $ nf TS.tail tsa
-      , bench "tl" $ nf TL.tail tla
-      , bench "bs" $ nf BS.tail bsa
-      , bench "bl" $ nf BL.tail bla
-      , bench "l" $ nf L.tail la
-      ],
-      bgroup "toLower" [
-        bench "ts" $ nf TS.toLower tsa
-      , bench "tl" $ nf TL.toLower tla
-      , bench "bs" $ nf (BS.map toLower) bsa
-      , bench "bl" $ nf (BL.map toLower) bla
-      , bench "l" $ nf (L.map toLower) la
-      ],
-      bgroup "toUpper" [
-        bench "ts" $ nf TS.toUpper tsa
-      , bench "tl" $ nf TL.toUpper tla
-      , bench "bs" $ nf (BS.map toUpper) bsa
-      , bench "bl" $ nf (BL.map toUpper) bla
-      , bench "l" $ nf (L.map toUpper) la
-      ],
-      bgroup "words" [
-        bench "ts" $ nf TS.words tsa
-      , bench "tl" $ nf TL.words tla
-      , bench "bs" $ nf BS.words bsa
-      , bench "bl" $ nf BL.words bla
-      , bench "l" $ nf L.words la
-      ],
-      bgroup "zipWith" [
-        bench "ts" $ nf (TS.zipWith min tsb) tsa
-      , bench "tl" $ nf (TL.zipWith min tlb) tla
-      , bench "bs" $ nf (BS.zipWith min bsb) bsa
-      , bench "bl" $ nf (BL.zipWith min blb) bla
-      , bench "l" $ nf (L.zipWith min lb) la
-      ],
-      bgroup "length" [
-        bgroup "cons" [
-          bench "ts" $ nf (TS.length . TS.cons c) tsa
-        , bench "tl" $ nf (TL.length . TL.cons c) tla
-        , bench "bs" $ nf (BS.length . BS.cons c) bsa
-        , bench "bl" $ nf (BL.length . BL.cons c) bla
-        , bench "l" $ nf (L.length . (:) c) la
-        ],
-        bgroup "decode" [
-          bench "ts" $ nf (TS.length . TS.decodeUtf8) bsa
-        , bench "tl" $ nf (TL.length . TL.decodeUtf8) bla
-        , bench "bs" $ nf (L.length . BS.unpack) bsa
-        , bench "bl" $ nf (L.length . BL.unpack) bla
-        , bench "utf8-string" $ nf (L.length . UTF8.toString) bsa
-        ],
-        bgroup "drop" [
-          bench "ts" $ nf (TS.length . TS.drop (tsa_len `div` 3)) tsa
-        , bench "tl" $ nf (TL.length . TL.drop (tla_len `div` 3)) tla
-        , bench "bs" $ nf (BS.length . BS.drop (bsa_len `div` 3)) bsa
-        , bench "bl" $ nf (BL.length . BL.drop (bla_len `div` 3)) bla
-        , bench "l" $ nf (L.length . L.drop (la_len `div` 3)) la
-        ],
-        bgroup "filter" [
-          bench "ts" $ nf (TS.length . TS.filter p0) tsa
-        , bench "tl" $ nf (TL.length . TL.filter p0) tla
-        , bench "bs" $ nf (BS.length . BS.filter p0) bsa
-        , bench "bl" $ nf (BL.length . BL.filter p0) bla
-        , bench "l" $ nf (L.length . L.filter p0) la
-        ],
-        bgroup "filter.filter" [
-          bench "ts" $ nf (TS.length . TS.filter p1 . TS.filter p0) tsa
-        , bench "tl" $ nf (TL.length . TL.filter p1 . TL.filter p0) tla
-        , bench "bs" $ nf (BS.length . BS.filter p1 . BS.filter p0) bsa
-        , bench "bl" $ nf (BL.length . BL.filter p1 . BL.filter p0) bla
-        , bench "l" $ nf (L.length . L.filter p1 . L.filter p0) la
-        ],
-        bgroup "init" [
-          bench "ts" $ nf (TS.length . TS.init) tsa
-        , bench "tl" $ nf (TL.length . TL.init) tla
-        , bench "bs" $ nf (BS.length . BS.init) bsa
-        , bench "bl" $ nf (BL.length . BL.init) bla
-        , bench "l" $ nf (L.length . L.init) la
-        ],
-        bgroup "intercalate" [
-          bench "ts" $ nf (TS.length . TS.intercalate tsw) tsl
-        , bench "tl" $ nf (TL.length . TL.intercalate tlw) tll
-        , bench "bs" $ nf (BS.length . BS.intercalate bsw) bsl
-        , bench "bl" $ nf (BL.length . BL.intercalate blw) bll
-        , bench "l" $ nf (L.length . L.intercalate lw) ll
-        ],
-        bgroup "intersperse" [
-          bench "ts" $ nf (TS.length . TS.intersperse c) tsa
-        , bench "tl" $ nf (TL.length . TL.intersperse c) tla
-        , bench "bs" $ nf (BS.length . BS.intersperse c) bsa
-        , bench "bl" $ nf (BL.length . BL.intersperse c) bla
-        , bench "l" $ nf (L.length . L.intersperse c) la
-        ],
-        bgroup "map" [
-          bench "ts" $ nf (TS.length . TS.map f) tsa
-        , bench "tl" $ nf (TL.length . TL.map f) tla
-        , bench "bs" $ nf (BS.length . BS.map f) bsa
-        , bench "bl" $ nf (BL.length . BL.map f) bla
-        , bench "l" $ nf (L.length . L.map f) la
-        ],
-        bgroup "map.map" [
-          bench "ts" $ nf (TS.length . TS.map f . TS.map f) tsa
-        , bench "tl" $ nf (TL.length . TL.map f . TL.map f) tla
-        , bench "bs" $ nf (BS.length . BS.map f . BS.map f) bsa
-        , bench "l" $ nf (L.length . L.map f . L.map f) la
-        ],
-        bgroup "replicate char" [
-          bench "ts" $ nf (TS.length . TS.replicate bsa_len) (TS.singleton c)
-        , bench "tl" $ nf (TL.length . TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
-        , bench "bs" $ nf (BS.length . BS.replicate bsa_len) c
-        , bench "bl" $ nf (BL.length . BL.replicate (fromIntegral bsa_len)) c
-        , bench "l" $ nf (L.length . L.replicate bsa_len) c
-        ],
-        bgroup "replicate string" [
-          bench "ts" $ nf (TS.length . TS.replicate (bsa_len `div` TS.length tsw)) tsw
-        , bench "tl" $ nf (TL.length . TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
-        , bench "l" $ nf (L.length . replicat (bsa_len `div` TS.length tsw)) lw
-        ],
-        bgroup "take" [
-          bench "ts" $ nf (TS.length . TS.take (tsa_len `div` 3)) tsa
-        , bench "tl" $ nf (TL.length . TL.take (tla_len `div` 3)) tla
-        , bench "bs" $ nf (BS.length . BS.take (bsa_len `div` 3)) bsa
-        , bench "bl" $ nf (BL.length . BL.take (bla_len `div` 3)) bla
-        , bench "l" $ nf (L.length . L.take (la_len `div` 3)) la
-        ],
-        bgroup "tail" [
-          bench "ts" $ nf (TS.length . TS.tail) tsa
-        , bench "tl" $ nf (TL.length . TL.tail) tla
-        , bench "bs" $ nf (BS.length . BS.tail) bsa
-        , bench "bl" $ nf (BL.length . BL.tail) bla
-        , bench "l" $ nf (L.length . L.tail) la
-        ],
-        bgroup "toLower" [
-          bench "ts" $ nf (TS.length . TS.toLower) tsa
-        , bench "tl" $ nf (TL.length . TL.toLower) tla
-        , bench "bs" $ nf (BS.length . BS.map toLower) bsa
-        , bench "bl" $ nf (BL.length . BL.map toLower) bla
-        , bench "l" $ nf (L.length . L.map toLower) la
-        ],
-        bgroup "toUpper" [
-          bench "ts" $ nf (TS.length . TS.toUpper) tsa
-        , bench "tl" $ nf (TL.length . TL.toUpper) tla
-        , bench "bs" $ nf (BS.length . BS.map toUpper) bsa
-        , bench "bl" $ nf (BL.length . BL.map toUpper) bla
-        , bench "l" $ nf (L.length . L.map toUpper) la
-        ],
-        bgroup "words" [
-          bench "ts" $ nf (L.length . TS.words) tsa
-        , bench "tl" $ nf (L.length . TL.words) tla
-        , bench "bs" $ nf (L.length . BS.words) bsa
-        , bench "bl" $ nf (L.length . BL.words) bla
-        , bench "l" $ nf (L.length . L.words) la
-        ],
-        bgroup "zipWith" [
-          bench "ts" $ nf (TS.length . TS.zipWith min tsb) tsa
-        , bench "tl" $ nf (TL.length . TL.zipWith min tlb) tla
-        , bench "bs" $ nf (L.length . BS.zipWith min bsb) bsa
-        , bench "bl" $ nf (L.length . BL.zipWith min blb) bla
-        , bench "l" $ nf (L.length . L.zipWith min lb) la
-        ]
-      ],
-      bgroup "builder" [
-        bench "mappend char" $ nf (TL.length . TB.toLazyText . mappendNChar 'a') 10000,
-        bench "mappend 8 char" $ nf (TL.length . TB.toLazyText . mappend8Char) 'a',
-        bench "mappend text" $ nf (TL.length . TB.toLazyText . mappendNText short) 10000
-      ]
-    ]
-  where
-    c  = 'й'
-    p0 = (== c)
-    p1 = (/= 'д')
-    lw  = "право"
-    bsw  = UTF8.fromString lw
-    blw  = BL.fromChunks [bsw]
-    tsw  = TS.pack lw
-    tlw  = TL.fromChunks [tsw]
-    f (C# c#) = C# (chr# (ord# c# +# 1#))
-    g (I# i#) (C# c#) = (I# (i# +# 1#), C# (chr# (ord# c# +# i#)))
-    len l _ = l + (1::Int)
-    replicat n = concat . L.replicate n
-    short = TS.pack "short"
-
-chunksOf :: Int -> BS.ByteString -> [BS.ByteString]
-chunksOf k = go
-  where
-    go t = case BS.splitAt k t of
-             (a,b) | BS.null a -> []
-                   | otherwise -> a : go b
-
-mappendNChar :: Char -> Int -> TB.Builder
-mappendNChar c n = go 0
-  where
-    go i
-      | i < n     = TB.singleton c `mappend` go (i+1)
-      | otherwise = mempty
-
--- Gives more opportunity for inlining and elimination of unnecesary
--- bounds checks.
-mappend8Char :: Char -> TB.Builder
-mappend8Char c = TB.singleton c `mappend` TB.singleton c `mappend`
-                 TB.singleton c `mappend` TB.singleton c `mappend`
-                 TB.singleton c `mappend` TB.singleton c `mappend`
-                 TB.singleton c `mappend` TB.singleton c
-
-mappendNText :: TS.Text -> Int -> TB.Builder
-mappendNText t n = go 0
-  where
-    go i
-      | i < n     = TB.fromText t `mappend` go (i+1)
-      | otherwise = mempty

File tests/Makefile

 
 cabal := $(shell which cabal 2>/dev/null)
 
-all: bm qc coverage regressions
+all: qc coverage regressions
 
 lib: $(lib)
 
 regressions: Regressions.o TestUtils.o
 	$(ghc) $(ghc-test-flags) -o $@ $^ $(lib)
 
-Benchmarks.o: ghc-opt-flags = -O
-bm Benchmarks.o: ghc-flags += -package utf8-string
-bm: Benchmarks.o
-	$(ghc) $(ghc-flags) -o $@ $^ $(lib)
-
 SlowFunctions.o: ghc-opt-flags = -O2
 SearchBench.o: ghc-opt-flags = -O
 %.o: %.hs
 	curl -O http://projects.haskell.org/text/text-testdata.tar.bz2
 
 clean:
-	-rm -rf *.o *.hi *.tix bm qc qc-hpc stdio-hpc hpcdir .hpc coverage-html
+	-rm -rf *.o *.hi *.tix qc qc-hpc stdio-hpc hpcdir .hpc coverage-html

File tests/README

-You also need the test data from:
-http://projects.haskell.org/text/text-testdata.tar.bz2

File tests/README.markdown

+Tests
+=====
+
+This directory contains the tests for the Text library. To run these tests, you
+will need the test data from:
+
+    http://projects.haskell.org/text/text-testdata.tar.bz2
+
+You should extract that archive to the same directory as this README (some tests
+rely on this).
+
+There are two categories of tests: functional tests (including QuickCheck
+properties), and benchmarks.
+
+Functional tests
+----------------
+
+TODO
+
+Benchmarks
+----------
+
+The benchmarks are located in the `benchmarks` subdirectory. An overview of
+what's in that directory:
+
+    python            Python implementations of some benchmarks
+    ruby              Ruby implementations of some benchmarks
+    src               Source files of the haskell benchmarks
+    benchmarks.cabal  Cabal file which compiles all benchmarks
+    Makefile          Has targets for common tasks
+
+To compile the benchmarks, navigate to the `benchmarks` subdirectory and run
+`cabal configure && cabal build`. Then, you can run the benchmarks using:
+
+    ./dist/build/benchmarks/benchmarks
+
+However, since there quite a lot of benchmarks, you usually don't want to run
+them all. Instead, use the `-l` flag to get a list of benchmarks:
+
+    ./dist/build/benchmarks/benchmarks
+
+And run the ones you want to inspect.

File tests/benchmarks/.gitignore

+dist

File tests/benchmarks/CaseMap.hs

-import Control.Exception
-import Control.Monad
-import Data.Time.Clock
-import Data.ByteString as B
-import Data.Text.Encoding as T
-import Data.Text as T
-import System.Environment
-
-time act = do
-  start <- getCurrentTime
-  act
-  end <- getCurrentTime
-  let d = diffUTCTime end start
-  print d
-
-main = do
-  args <- getArgs
-  forM_ args $ \f -> do
-      t <- T.decodeUtf8 `fmap` B.readFile f
-      evaluate t
-      time $ evaluate (T.toUpper t)

File tests/benchmarks/Cut.hs

-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.Text.IO as T
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.Text.Lazy.IO as TL
-import qualified Data.Text.Lazy as TL
-import Data.Int (Int64)
-import Numeric (readDec)
-import System.Environment (getArgs)
-
-bytestring file s e = do
-  t <- B.readFile file
-  B.putStr (cut t)
-  where
-    cut = B.unlines . map (B.take (e - s) . B.drop s) . B.lines
-
-lazyBytestring file s e = do
-  t <- BL.readFile file
-  BL.putStr (cut (fromIntegral s) (fromIntegral e) t)
-  where
-    cut s e = BL.unlines . map (BL.take (e - s) . BL.drop s) . BL.lines
-
-lazyText file s e = do
-  t <- TL.readFile file
-  TL.putStr (cut (fromIntegral s) (fromIntegral e) t)
-  where
-    cut s e = TL.unlines . map (TL.take (e - s) . TL.drop s) . TL.lines
-
-text file s e = do
-  t <- T.readFile file
-  T.putStr (cut t)
-  where
-    cut = T.unlines . map (T.take (e - s) . T.drop s) . T.lines
-
-textBS file s e = do
-  bs <- B.readFile file
-  T.putStr . cut . T.decodeUtf8 $ bs
-  where
-    cut = T.unlines . map (T.take (e - s) . T.drop s) . T.lines
-
-lazyTextBS file s e = do
-  t <- BL.readFile file
-  TL.putStr (cut (fromIntegral s) (fromIntegral e) (TL.decodeUtf8 t))
-  where
-    cut s e = TL.unlines . map (TL.take (e - s) . TL.drop s) . TL.lines
-
-main = do
-  (name : ss : es : file : _) <- getArgs
-  let [(s',"")] = readDec ss
-      [(e,"")] = readDec es
-      s = s' - 1
-  case name of
-    "bs" -> bytestring file s e
-    "lbs" -> lazyBytestring file s e
-    "ltext" -> lazyText file s e
-    "text" -> text file s e
-    "ltextBS" -> lazyTextBS file s e
-    "textBS" -> textBS file s e

File tests/benchmarks/DecodeUtf8.hs

-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.Text.Lazy.IO as TL
-import qualified Codec.Binary.UTF8.Generic as U8
-import Control.DeepSeq
-import System.Environment
-import System.IO
-
-strict h = do
-  bs <- B.hGetContents h
-  rnf (T.decodeUtf8 bs) `seq` return ()
-
-strict_len h = do
-  bs <- B.hGetContents h
-  print . T.length . T.decodeUtf8 $ bs
-
-strict_init_len h = do
-  bs <- B.hGetContents h
-  print . T.length . T.init . T.decodeUtf8 $ bs
-
-strict_io h = do
-  hSetEncoding h utf8
-  t <- T.hGetContents h
-  rnf t `seq` return ()
-
-strict_len_io h = do
-  hSetEncoding h utf8
-  t <- T.hGetContents h
-  print (T.length t)
-
-lazy h = do
-  bs <- BL.hGetContents h
-  rnf (TL.decodeUtf8 bs) `seq` return ()
-
-lazy_len h = do
-  bs <- BL.hGetContents h
-  print . TL.length . TL.decodeUtf8 $ bs
-
-lazy_init_len h = do
-  bs <- BL.hGetContents h
-  print . TL.length . TL.init . TL.decodeUtf8 $ bs
-
-lazy_io h = do
-  hSetEncoding h utf8
-  t <- TL.hGetContents h
-  rnf t `seq` return ()
-
-lazy_len_io h = do
-  hSetEncoding h utf8
-  t <- TL.hGetContents h
-  print (TL.length t)
-
-string h = do
-  hSetEncoding h utf8
-  t <- hGetContents h
-  rnf t `seq` return ()
-
-string_len h = do
-  hSetEncoding h utf8
-  t <- hGetContents h
-  print (length t)
-
-lazy_string_utf8 h = do
-  bs <- BL.hGetContents h
-  let t = U8.toString bs
-  rnf t `seq` return ()
-
-lazy_string_utf8_len h = do
-  bs <- BL.hGetContents h
-  let t = U8.toString bs
-  print (length t)
-
-strict_string_utf8 h = do
-  bs <- B.hGetContents h
-  let t = U8.toString bs
-  rnf t `seq` return ()
-
-strict_string_utf8_len h = do
-  bs <- B.hGetContents h
-  let t = U8.toString bs
-  print (length t)
-
-main = do
-  [kind,name] <- getArgs
-  h <- openFile name ReadMode
-  case kind of
-    "strict" -> strict h
-    "strict_len" -> strict_len h
-    "strict_init_len" -> strict_init_len h
-    "strict_io" -> strict_io h
-    "strict_len_io" -> strict_len_io h
-    "lazy" -> lazy h
-    "lazy_len" -> lazy_len h
-    "lazy_init_len" -> lazy_init_len h
-    "lazy_io" -> lazy_io h
-    "lazy_len_io" -> lazy_len_io h
-    "string" -> string h
-    "string_len" -> string_len h
-    "lazy_string_utf8" -> lazy_string_utf8 h
-    "lazy_string_utf8_len" -> lazy_string_utf8_len h
-    "strict_string_utf8" -> strict_string_utf8 h
-    "strict_string_utf8_len" -> strict_string_utf8_len h

File tests/benchmarks/EncodeUtf8.hs

-{-# LANGUAGE OverloadedStrings #-}
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.Text.Lazy.IO as TL
-import qualified Codec.Binary.UTF8.Generic as U8
-import System.Environment
-import System.IO
-
-strict_bytestring k s = do
-  let t = T.replicate k (T.pack s)
-  B.putStr (T.encodeUtf8 t)
-
-lazy_bytestring k s = do
-  let t = TL.replicate (fromIntegral k) (TL.pack s)
-  BL.putStr (TL.encodeUtf8 t)
-
-strict_io k s = do
-  let t = T.replicate k (T.pack s)
-  hSetEncoding stdout utf8
-  T.putStr t
-
-lazy_io k s = do
-  let t = TL.replicate (fromIntegral k) (TL.pack s)
-  hSetEncoding stdout utf8
-  TL.putStr t
-
-string k s = do
-  let t = concat $ replicate k s
-  hSetEncoding stdout utf8
-  putStr t
-
-lazy_string_utf8 k s = do
-  let t = concat $ replicate k s
-  BL.putStr (U8.fromString t)
-
-strict_string_utf8 k s = do
-  let t = concat $ replicate k s
-  B.putStr (U8.fromString t)
-
-main = do
-  [kind,str,kstr] <- getArgs
-  let k = read kstr * 1000000
-  case kind of
-    "strict" -> strict_bytestring k str
-    "lazy" -> lazy_bytestring k str
-    "strict_io" -> strict_io k str
-    "lazy_io" -> lazy_io k str
-    "string" -> string k str
-    "lazy_string_utf8" -> lazy_string_utf8 k str
-    "strict_string_utf8" -> strict_string_utf8 k str

File tests/benchmarks/Equality.hs

-import System.Environment
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-
-func :: (Eq a) => [a] -> IO ()
-func ls =
-  print . sum . map (\needle -> length . filter (==needle) $ ls) $ take 100 ls
-
-bytestring haystack = func =<< B.lines `fmap` B.readFile haystack
-
-lazyBytestring haystack = func =<< BL.lines `fmap` BL.readFile haystack
-
-text haystack = func =<< (T.lines . T.decodeUtf8) `fmap` B.readFile haystack
-
-lazyText haystack = func =<<
-                    (TL.lines . TL.decodeUtf8) `fmap` BL.readFile haystack
-
-string haystack = func =<< lines `fmap` readFile haystack
-
-main = do
-  args <- getArgs
-  case args of
-    ["bs",h] -> bytestring h
-    ["lazybs",h] -> lazyBytestring h
-    ["text",h] -> text h
-    ["lazytext",h] -> lazyText h
-    ["string",h] -> string h

File tests/benchmarks/FileIndices.hs

-{-# LANGUAGE BangPatterns #-}
-
-import System.Environment (getArgs)
-import qualified Data.Text.Lazy.IO as T
-import qualified Data.Text.Lazy as T
-import qualified Data.Text.Lazy.Encoding as T
-import qualified Data.ByteString.Lazy.Char8 as B
-import qualified Data.ByteString.Char8 as BS
-import qualified Data.ByteString.Lazy.Search as B
-
-text :: FilePath -> String -> IO ()
-text file pat = T.readFile file >>= print . T.count (T.pack pat)
-
-textBS :: FilePath -> String -> IO ()
-textBS file pat = B.readFile file >>= print . T.count (T.pack pat) . T.decodeUtf8
-
-bytestring :: FilePath -> String -> IO ()
-bytestring file pat = B.readFile file >>= print . length . B.indices (BS.pack pat)
-
-main = do
-  (name : file : pat : _) <- getArgs
-  case name of
-    "bs" -> bytestring file pat
-    "text" -> text file pat
-    "textBS" -> textBS file pat

File tests/benchmarks/FileRead.hs

-{-# LANGUAGE BangPatterns #-}
-
-import System.Environment (getArgs)
-import qualified Data.Text.IO as T
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy.IO as TL
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import System.IO
-
-string :: Handle -> IO ()
-string h = hGetContents h >>= print . length
-
-ltext :: Handle -> IO ()
-ltext h = do
-  t <- {-# SCC "TL.hGetContents" #-} TL.hGetContents h
-  print (TL.length t)
-
-ltextBS :: Handle -> IO ()
-ltextBS h = do
-  bs <- {-# SCC "B.hGetContents" #-} BL.hGetContents h
-  print . TL.length . TL.decodeUtf8 $ bs
-
-text :: Handle -> IO ()
-text h = do
-  t <- {-# SCC "T.hGetContents" #-} T.hGetContents h
-  print (T.length t)
-
-textBS :: Handle -> IO ()
-textBS h = do
-  bs <- {-# SCC "B.hGetContents" #-} B.hGetContents h
-  print . T.length . T.decodeUtf8 $ bs
-
-lbytestring :: Handle -> IO ()
-lbytestring h = do
-  bs <- {-# SCC "BL.hGetContents" #-} BL.hGetContents h
-  print (BL.length bs)
-
-bytestring :: Handle -> IO ()
-bytestring h = do
-  bs <- {-# SCC "B.hGetContents" #-} B.hGetContents h
-  print (B.length bs)
-
-main = do
-  (name : file : _) <- getArgs
-  h <- openFile file ReadMode
-  case name of
-    "bs" -> bytestring h
-    "lbs" -> lbytestring h
-    "ltext" -> ltext h
-    "ltextBS" -> ltextBS h
-    "string" -> string h
-    "text" -> text h
-    "textBS" -> textBS h

File tests/benchmarks/FoldLines.hs

-{-# LANGUAGE BangPatterns #-}
-
-import System.Environment
-import System.IO
-import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.ByteString as S
-
--- Text
-foldLinesT :: (a -> T.Text -> a) -> a -> Handle -> IO a
-foldLinesT f z0 h = go z0
-  where
-    go !z = do
-        eof <- hIsEOF h
-        if eof
-            then return z
-            else do
-                l <- T.hGetLine h
-                let z' = f z l in go z'
-{-# INLINE foldLinesT #-}
-
-testT :: Handle -> IO Int
-testT = foldLinesT (\n _ -> n + 1) 0
-
---ByteString
-foldLinesB :: (a -> S.ByteString -> a) -> a -> Handle -> IO a
-foldLinesB f z0 h = go z0
-  where
-    go !z = do
-        eof <- hIsEOF h
-        if eof
-            then return z
-            else do
-                l <- S.hGetLine h
-                let z' = f z l in go z'
-{-# INLINE foldLinesB #-}
-
-testB :: Handle -> IO Int
-testB = foldLinesB (\n _ -> n + 1) 0
-
-main = do
-  (name : file : _) <- getArgs
-  h <- openFile file ReadMode
-  hSetBuffering h (BlockBuffering (Just 16384))
-  case name of
-    "bs" -> testB h
-    "text" -> testT h

File tests/benchmarks/HtmlCombinator.hs

-{-# LANGUAGE BangPatterns, OverloadedStrings #-}
-import Data.Monoid (mappend, mconcat)
-import Prelude hiding (putStr)
-import Data.Text.Lazy.Builder (Builder, fromText, toLazyText)
-import Data.Text.Lazy.IO (putStr)
-
-import qualified Data.Text as T
-
-main :: IO ()
-main = do
-  putStr "Content-Type: text/html\n\n<table>"
-  putStr . toLazyText $ mconcat (replicate 20000 makeRow) 
-  putStr "</table>"
-
-makeRow :: Builder
-makeRow = mconcat (map makeCol [1..50])
-
-makeCol :: Int -> Builder
-makeCol 1 = fromText "<tr><td>1</td>"
-makeCol 50 = fromText "<td>50</td></tr>"
-makeCol i = fromText "<td>" `mappend` (textInt i `mappend` fromText "</td>")
-
-textInt :: Int -> Builder
-textInt = fromText . T.pack . show

File tests/benchmarks/Makefile

-CC := $(shell icu-config --cc)
-CFLAGS := -g $(shell icu-config --cflags)
-CPPFLAGS := $(shell icu-config --cppflags)
-LDFLAGS := $(CFLAGS) $(shell icu-config --ldflags --ldflags-icuio)
-ghc := ghc
-
-all := FileRead FileRead_prof Replace Replace_prof fileread_c
-
-all: $(all)
-
-%: %.hs
-	$(ghc) -O --make -o $@ $^
-
-%_prof: %.hs
-	$(ghc) -prof -auto-all -O --make -o $@ $^
-
-clean:
-	-rm -f *.hi *.o $(all)

File tests/benchmarks/Ordering.hs

-import System.Environment
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-
-every :: Int -> [a] -> [a]
-every k = go k
-  where go n (x:xs)
-          | n < k     = go (n+1) xs
-          | otherwise = x : go 1 xs
-        go _ _        = []
-
-func :: (Ord a) => [a] -> IO ()
-func ls = print . sum . map f $ every 1000 ls
-    where f needle = length . filter ((==GT) . compare needle) $ ls
-
--- Test a comparison that could be fused: compare (toLower a) (toLower b)
-func1 ls = print . sum . map f $ every 1000 ls
-    where f needle = length . filter ((==GT) . compare (T.toLower needle) . T.toLower) $ ls
-
-bytestring haystack = func =<< B.lines `fmap` B.readFile haystack
-
-lazyBytestring haystack = func =<< BL.lines `fmap` BL.readFile haystack
-
-text haystack = func =<< (T.lines . T.decodeUtf8) `fmap` B.readFile haystack
-
-lazyText haystack = func =<<
-                    (TL.lines . TL.decodeUtf8) `fmap` BL.readFile haystack
-
-string haystack = func =<< lines `fmap` readFile haystack
-
-main = do
-  args <- getArgs
-  case args of
-    ["bs",h] -> bytestring h
-    ["lazybs",h] -> lazyBytestring h
-    ["text",h] -> text h
-    ["lazytext",h] -> lazyText h
-    ["string",h] -> string h

File tests/benchmarks/ReadNumbers.hs

-{-# LANGUAGE BangPatterns #-}
-
-import Data.List (foldl')
-import System.Environment (getArgs)
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.ByteString.Lex.Double as B
-import qualified Data.ByteString.Lex.Lazy.Double as BL
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.Text.Lazy.Read as TL
-import qualified Data.Text.Read as T
-
-readem :: (Ord a, Num a) =>
-          IO [t] -> (t -> Either String (a,t)) -> IO ()
-readem act reader = print . foldl' go 1000000 =<< act
-  where go z t = case reader t of
-                   Left err    -> error err
-                   Right (n,_) -> min n z
-    
-bytey :: (Ord a, Num a) =>
-         IO [t] -> (t -> Maybe (a,t)) -> IO ()
-bytey act reader = print . foldl' go 1000000 =<< act
-  where go z t = case reader t of
-                   Nothing    -> error "barf"
-                   Just (n,_) -> min n z
-
-main = do
-  args <- getArgs
-  let strict = (T.lines . T.decodeUtf8) `fmap` B.getContents
-      lazy = (TL.lines . TL.decodeUtf8) `fmap` BL.getContents
-      bs = B.lines `fmap` B.getContents
-      lbs = BL.lines `fmap` BL.getContents
-  case args of
-    ["dec"] -> readem strict (T.signed T.decimal :: T.Reader Int)
-    ["hex"] -> readem strict (T.signed T.hexadecimal :: T.Reader Int)
-    ["double"] -> readem strict (T.double :: T.Reader Double)
-    ["rational"] -> readem strict (T.rational :: T.Reader Double)
-    ["ldec"] -> readem lazy (TL.signed TL.decimal :: TL.Reader Int)
-    ["lhex"] -> readem lazy (TL.signed TL.hexadecimal :: TL.Reader Int)
-    ["ldouble"] -> readem lazy (TL.double :: TL.Reader Double)
-    ["lrational"] -> readem lazy (TL.rational :: TL.Reader Double)
-    ["bdec"] -> bytey bs B.readInt
-    ["bdouble"] -> bytey bs B.readDouble
-    ["bldec"] -> bytey lbs BL.readInt
-    ["bldouble"] -> bytey lbs BL.readDouble

File tests/benchmarks/Replace.hs

-{-# LANGUAGE BangPatterns #-}
-module Main (main) where
-
-import System.Environment (getArgs)
-import qualified Data.Text.Lazy as LT
-import qualified Data.Text.Lazy.IO as LT
-import qualified Data.ByteString.Lazy.Search as LB
-import qualified Data.ByteString.Lazy.Char8 as LB
-import qualified Data.ByteString.Char8 as B
-
-lazyText file pat sub =
-  LT.readFile file >>= LT.putStr . LT.replace (LT.pack pat) (LT.pack sub)
-
-lazyBS file pat sub =
-  LB.readFile file >>= LB.putStr . LB.replace (B.pack pat) (LB.pack sub)
-
-main = do
-  (kind : file : pat : sub : _) <- getArgs
-  case kind of
-    "lazyText" -> lazyText file pat sub
-    "lazyTextNull" -> LT.readFile file >>= LT.putStr
-    "lazyBS" -> lazyBS file pat sub
-    "lazyBSNull" -> LB.readFile file >>= LB.putStr

File tests/benchmarks/ReplaceTags.hs

--- Contributed by Ken Friis Larsen and Morten Ib Nielsen.
-
-{-# LANGUAGE BangPatterns #-}
-module Main (main) where
-
-import System.Environment (getArgs)
-import qualified Char
-
-import qualified Data.Text as T
-import qualified Data.Text.IO as T
-
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.IO as TL
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Text.Lazy.Encoding as TLE
-
-
-import qualified Data.ByteString.Char8 as BC
-import qualified Data.ByteString as B
-import qualified Data.Text.Encoding as TE
-
-replaceTagsM file tag sub = 
-  BC.readFile file >>= BC.putStr . replaceTags tag sub . TE.encodeUtf8 . T.toLower . TE.decodeUtf8 
-  where 
-    replaceTags tag replacement str = B.concat $ reverse $ replaceTags' [] (BC.pack $ '<' : tag) '>' (BC.pack replacement) str
-    replaceTags' !res start end repl str =
-      let (pre, post) = BC.breakSubstring start str
-      in if BC.null post
-           then  pre : res
-           else replaceTags' (repl : pre : res) start end repl $ BC.drop 1 $
-                BC.dropWhile (/= end) post
-
-splitB sep str = seplen `seq` splitter str 
-  where 
-    splitter str = h : if B.null t then [] else splitter (B.drop seplen t)
-      where (h,t) = B.breakSubstring sep str
-    seplen = B.length sep
-    
-replaceTagsWrong file tagName sub = do
-  content <- BC.readFile file
-  let frags = map (BC.drop 1 . BC.dropWhile (/= '>')) 
-              $ splitB (BC.pack $ '<' : tagName) (BC.map Char.toLower content)
-  BC.putStr $ BC.intercalate (BC.pack sub) frags
- 
-replaceTagsK file tagName sub = do
-  raw <- BC.readFile file 
-  let content = (TE.encodeUtf8 . T.toLower . TE.decodeUtf8) raw
-  let frags = map (BC.drop 1 . BC.dropWhile (/= '>')) 
-              $ splitB (BC.pack $ '<' : tagName) content
-  BC.putStr $ BC.intercalate (BC.pack sub) frags
-
-replaceTagsO file tagName sub = do
-  raw <- BC.readFile file 
-  let content = (TE.encodeUtf8 . T.toLower . TE.decodeUtf8) raw
-  let frags = splitB (BC.pack $ '<' : tagName) content
-  BC.putStr $ BC.intercalate (BC.pack sub) frags
-  where 
-    splitB sep str = splitter str 
-      where 
-        splitter str = h : if BC.null t then [] else splitter (BC.drop 1 $ BC.dropWhile (/= '>') t)
-          where (h,t) = B.breakSubstring sep str
-
-
-    
-replaceTagsT file tagName sub = do
-  raw <- B.readFile file 
-  let content = TE.decodeUtf8 raw
-  let frags = map (T.drop 1 . T.dropWhile (/= '>')) 
-              $ T.split (T.pack $ '<' : tagName) (T.toLower content)
-  T.putStr $ T.intercalate (T.pack sub) frags
-  
-replaceTagsTL file tagName sub = do
-  raw <- BL.readFile file 
-  let content = TLE.decodeUtf8 raw
-  let frags = map (TL.drop 1 . TL.dropWhile (/= '>')) 
-              $ TL.split (TL.pack $ '<' : tagName) (TL.toLower content)
-  TL.putStr $ TL.intercalate (TL.pack sub) frags
-
-
-main = do
-  (kind : file : tag : sub : _) <- getArgs
-  case kind of
-    "Text" -> replaceTagsT file tag sub
-    "TextLazy" -> replaceTagsTL file tag sub
-    "BytestringM" -> replaceTagsM file tag sub
-    "BytestringK" -> replaceTagsK file tag sub
-    "BytestringO" -> replaceTagsO file tag sub
-    "TextNull" -> T.readFile file >>= T.putStr
-    "ByteNull" -> B.readFile file >>= B.putStr
-    "EncodeNull" -> B.readFile file >>= T.putStr . T.toLower . TE.decodeUtf8 
-

File tests/benchmarks/Setup.hs

+import Distribution.Simple
+main = defaultMain

File tests/benchmarks/StripBrackets.hs

--- From Petr Prokhorenkov.
-
-import Data.Text as T
-import Data.Text.IO as T
-
-stripBrackets :: T.Text -> T.Text
-stripBrackets text = snd $ T.mapAccumL f 0 text where
-   f depth c = let
-       depth' = depth + d' c
-       c' | depth > 0 || depth' > 0 = ' '
-          | otherwise = c
-       in
-       (depth', c')
-
-   d' '{' = 1
-   d' '[' = 1
-   d' '}' = -1
-   d' ']' = -1
-   d' _   = 0
-
-main = T.interact stripBrackets

File tests/benchmarks/benchmarks.cabal

+Name:                benchmarks
+Version:             0.1
+Synopsis:            Benchmarks for the text package
+Homepage:            http://bitbucket.org/bos/text
+License:             BSD3
+License-file:        LICENSE
+Author:              Jasper Van der Jeugt <jaspervdj@gmail.com>,
+                     Bryan O'Sullivan <bos@serpentine.com>,
+                     Tom Harper <rtomharper@googlemail.com>,
+                     Duncan Coutts <duncan@haskell.org>
+Maintainer:          jaspervdj@gmail.com
+Category:            Text
+Build-type:          Simple
+
+Cabal-version:       >=1.2
+
+Executable benchmarks
+  Hs-source-dirs: src ../..
+  Main-is:        Data/Text/Benchmarks.hs
+  Ghc-options:    -Wall -O2
+  Cpp-options:    -DHAVE_DEEPSEQ
+  Build-depends:  base              >= 4   && < 5,
+                  criterion         >= 0.5 && < 0.6,
+                  bytestring        >= 0.9 && < 0.10,
+                  deepseq           >= 1.1 && < 1.2,
+                  filepath          >= 1.2 && < 1.3,
+                  directory         >= 1.1 && < 1.2,
+                  containers        >= 0.3 && < 0.5,
+                  binary            >= 0.5 && < 0.6,
+                  utf8-string       >= 0.3 && < 0.4,
+                  blaze-builder     >= 0.3 && < 0.4,
+                  bytestring-lexing >= 0.2 && < 0.3,
+                  stringsearch      >= 0.3 && < 0.4

File tests/benchmarks/casemap.py

-#!/usr/bin/env python
-
-import sys, time
-
-def timeit(f):
-    start = time.time()
-    f()
-    end = time.time()
-    print end - start
-
-for f in sys.argv[1:]:
-    s = open(f).read()
-    u = s.decode('utf8')
-    timeit(lambda: s.upper())
-    timeit(lambda: u.upper())

File tests/benchmarks/fileread.py

-#!/usr/bin/env python
-
-import sys
-
-def string(name):
-    print len(open(name).read())
-
-def lazystring(name):
-    fp = open(name)
-    n = 0
-    d = True
-    bs = 128 * 1024
-    read = fp.read
-    while d:
-        d = len(read(bs))
-        n += d
-    print n
-
-def lazytext(name):
-    fp = open(name)
-    n = 0
-    d = True
-    bs = 128 * 1024
-    read = fp.read
-    while d:
-        s = read(bs)
-        d = len(s.decode('utf-8', 'replace'))
-        n += d
-    print n
-
-def text(name):
-    print len(open(name).read().decode('utf-8', 'replace'))
-
-if sys.argv[1] == 'bs':
-    string(sys.argv[2])
-if sys.argv[1] == 'lbs':
-    lazystring(sys.argv[2])
-elif sys.argv[1] == 'lazytext':
-    lazytext(sys.argv[2])
-elif sys.argv[1] == 'text':
-    text(sys.argv[2])

File tests/benchmarks/fileread_c.c

-#include <unicode/ustdio.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-void lazystring(const char *name)
-{
-    FILE *ufp = fopen(name, "r");
-    const size_t bufsize = sizeof(char) * 32 * 1024;
-    char *str = malloc(bufsize);
-    long len = 0;
-    int32_t n;
-
-    do {
-	n = fread(str, sizeof(char), bufsize, ufp);
-	len += n;
-    } while (n > 0);
-
-    printf("%ld\n", len);
-}
-
-void lazytext(const char *name)
-{
-    UFILE *ufp = u_fopen(name, "r", NULL, "UTF-8");
-    const size_t bufsize = sizeof(UChar) * 32 * 1024;
-    UChar *str = malloc(bufsize);
-    long len = 0;
-    int32_t n;
-
-    do {
-	n = u_file_read(str, bufsize, ufp);
-	len += n;
-    } while (n > 0);
-
-    printf("%ld\n", len);
-}
-
-void text(const char *name)
-{
-    UFILE *ufp = u_fopen(name, "r", NULL, "UTF-8");
-    FILE *fp = u_fgetfile(ufp);
-    UChar *str;
-    long fsize;
-    int32_t n;
-
-    fseek(fp, 0, SEEK_END);
-    fsize = ftell(fp);
-    u_frewind(ufp);
-
-    str = malloc(sizeof(*str) * fsize);
-
-    n = u_file_read(str, fsize, ufp);
-
-    printf("%d\n", n);
-}
-
-void string(const char *name)
-{
-    FILE *fp = fopen(name, "r");
-    char *str;
-    long fsize;
-    int32_t n;
-
-    fseek(fp, 0, SEEK_END);
-    fsize = ftell(fp);
-    fseek(fp, 0, SEEK_SET);
-
-    str = malloc(sizeof(*str) * fsize);
-
-    n = fread(str, sizeof(char), fsize, fp);
-
-    printf("%d\n", n);
-}
-
-int main(int argc, char **argv)
-{
-    if (argc != 3) {
-	fprintf(stderr, "Usage: %s handler filename\n", argv[0]);
-	exit(1);
-    }
-
-    if (strcmp(argv[1], "lazystring") == 0)
-	lazystring(argv[2]);
-    else if (strcmp(argv[1], "lazytext") == 0)
-	lazytext(argv[2]);
-    else if (strcmp(argv[1], "string") == 0)
-	string(argv[2]);
-    else if (strcmp(argv[1], "text") == 0)
-	text(argv[2]);
-    else {
-	fprintf(stderr, "no matching handler\n");
-	return 1;
-    }
-
-    return 0;
-}

File tests/benchmarks/python/.gitignore

+__pycache__
+*.pyc

File tests/benchmarks/python/case_map.py

+#!/usr/bin/env python
+
+import utils, sys
+
+for f in sys.argv[1:]:
+    t = utils.benchmark(lambda: utils.with_utf8_file(f, lambda c: c.upper()))
+    sys.stderr.write('{0}: {1}\n'.format(f, t))

File tests/benchmarks/python/file_read.py

+#!/usr/bin/env python
+
+import utils, sys
+
+for f in sys.argv[1:]:
+    t = utils.benchmark(lambda: utils.with_utf8_file(f, lambda c: len(c)))
+    sys.stderr.write('{0}: {1}\n'.format(f, t))

File tests/benchmarks/python/sort.py

+#!/usr/bin/env python
+
+import utils, sys
+
+def sort(string):
+    lines = string.splitlines()
+    lines.sort()
+    return '\n'.join(lines)
+
+for f in sys.argv[1:]:
+    t = utils.benchmark(lambda: sys.stdout.write(
+                    				utils.with_utf8_file(f,sort).encode('utf-8'))
+                    				)
+    sys.stderr.write('{0}: {1}\n'.format(f, t))
+

File tests/benchmarks/python/strip_brackets.py

+#!/usr/bin/env python
+
+import utils, sys
+
+def strip_brackets(string):
+    d = 0
+    out = ''
+    for c in string:
+        if c == '{' or c == '[': d += 1
+
+        if d > 0:
+            out += ' '
+        else:
+            out += c
+
+        if c == '}' or c == ']': d -= 1
+
+    return out
+
+for f in sys.argv[1:]:
+    t = utils.benchmark(lambda: utils.with_utf8_file(f, strip_brackets))
+    sys.stderr.write('{0}: {1}\n'.format(f, t))

File tests/benchmarks/python/utils.py

+#!/usr/bin/env python
+
+import sys, time
+
+def benchmark_once(f):
+    start = time.time()
+    f()
+    end = time.time()
+    return end - start
+
+def benchmark(f):
+    runs = 100
+    total = 0.0
+    for i in range(runs):
+        result = benchmark_once(f)
+        sys.stderr.write('Run {0}: {1}\n'.format(i, result))
+        total += result
+    return total / runs
+
+def with_utf8_file(filename, f):
+    contents = open(filename).read().decode('utf-8')
+    return f(contents)

File tests/benchmarks/python/word_count.py

+#!/usr/bin/env python
+
+import utils, sys
+
+def word_count(string):
+    freqs = {}
+    for w in string.split():
+        w = w.lower()
+        if freqs.get(w):
+            freqs[w] += 1
+        else:
+            freqs[w] = 1
+    return freqs
+
+for f in sys.argv[1:]:
+    t = utils.benchmark(lambda: utils.with_utf8_file(f, word_count))
+    sys.stderr.write('{0}: {1}\n'.format(f, t))

File tests/benchmarks/ruby/case_map.rb

+#!/usr/bin/env ruby
+
+require './utils.rb'
+
+ARGV.each do |f|
+  t = benchmark { with_utf8_file(f) { |c| c.upcase } }
+  STDERR.puts "#{f}: #{t}"
+end

File tests/benchmarks/ruby/file_read.rb

+#!/usr/bin/env ruby
+
+require './utils.rb'
+
+ARGV.each do |f|
+  t = benchmark { with_utf8_file(f) { |c| c.size } }
+  STDERR.puts "#{f}: #{t}"
+end

File tests/benchmarks/ruby/sort.rb

+#!/usr/bin/env ruby
+
+require './utils.rb'
+
+def sort(str)
+  str.lines.sort.join
+end
+
+ARGV.each do |f|
+  t = benchmark do
+    with_utf8_file(f) { |c| puts sort(c) }
+  end
+  STDERR.puts "#{f}: #{t}"
+end

File tests/benchmarks/ruby/strip_brackets.rb

+#!/usr/bin/env ruby
+
+require './utils.rb'
+
+def strip_brackets(str)
+  d = 0
+  out = ''
+
+  str.each_char do |c|
+    d += 1 if c == '{' || c == '['
+    out << if d > 0 then ' ' else c end
+    d -= 1 if c == '}' || c == ']'
+  end
+
+  out
+end
+
+ARGV.each do |f|
+  t = benchmark { with_utf8_file(f) { |c| strip_brackets(c) } }
+  STDERR.puts "#{f}: #{t}"
+end

File tests/benchmarks/ruby/utils.rb

+require 'benchmark'
+
+def benchmark(&block)
+  runs = 100
+  total = 0
+
+  runs.times do |i|
+    result = Benchmark.measure(&block).total
+    $stderr.puts "Run #{i}: #{result}"
+    total += result
+  end
+
+  total / runs 
+end
+
+def with_utf8_file(filename)
+  File.open(filename, 'r:utf-8') do |file|
+    yield file.read
+  end
+end

File tests/benchmarks/ruby/word_count.rb

+#!/usr/bin/env ruby
+
+require './utils.rb'
+
+def word_count(str)
+  freqs = Hash.new 0
+  str.split.each do |w|
+    freqs[w.downcase] += 1
+  end
+  freqs
+end
+
+ARGV.each do |f|
+  t = benchmark { with_utf8_file(f) { |c| word_count(c) } }
+  STDERR.puts "#{f}: #{t}"
+end

File tests/benchmarks/src/Data/Text/Benchmarks.hs

+-- | Main module to run the micro benchmarks
+--
+{-# LANGUAGE OverloadedStrings #-}
+module Main
+    ( main
+    ) where
+
+import Criterion.Main (Benchmark, defaultMain)
+import System.FilePath ((</>))
+import System.IO (IOMode (WriteMode), openFile, hSetEncoding, utf8)
+
+import qualified Data.Text.Benchmarks.Builder as Builder
+import qualified Data.Text.Benchmarks.CaseMap as CaseMap
+import qualified Data.Text.Benchmarks.Cut as Cut
+import qualified Data.Text.Benchmarks.DecodeUtf8 as DecodeUtf8
+import qualified Data.Text.Benchmarks.EncodeUtf8 as EncodeUtf8
+import qualified Data.Text.Benchmarks.Equality as Equality
+import qualified Data.Text.Benchmarks.FileIndices as FileIndices
+import qualified Data.Text.Benchmarks.FileRead as FileRead
+import qualified Data.Text.Benchmarks.FoldLines as FoldLines
+import qualified Data.Text.Benchmarks.HtmlCombinator as HtmlCombinator
+import qualified Data.Text.Benchmarks.Ordering as Ordering
+import qualified Data.Text.Benchmarks.Pure as Pure
+import qualified Data.Text.Benchmarks.ReadNumbers as ReadNumbers
+import qualified Data.Text.Benchmarks.Replace as Replace
+import qualified Data.Text.Benchmarks.Sort as Sort
+import qualified Data.Text.Benchmarks.StripBrackets as StripBrackets
+import qualified Data.Text.Benchmarks.WordCount as WordCount
+
+main :: IO ()
+main = benchmarks >>= defaultMain
+
+benchmarks :: IO [Benchmark]
+benchmarks = do
+    sink <- openFile "/dev/null" WriteMode
+    hSetEncoding sink utf8
+    sequence
+        [ Builder.benchmark
+        , CaseMap.benchmark (tf "russian.txt") sink
+        , Cut.benchmark (tf "russian.txt") sink 30 60
+        , DecodeUtf8.benchmark (tf "russian.txt")
+        , EncodeUtf8.benchmark sink "επανάληψη 竺法蘭共譯"
+        , Equality.benchmark (tf "japanese.txt")
+        , FileIndices.benchmark (tf "russian.txt") "принимая"
+        , FileRead.benchmark (tf "russian.txt")
+        , FoldLines.benchmark (tf "russian.txt")
+        , HtmlCombinator.benchmark sink
+        , Ordering.benchmark (tf "russian.txt")
+        , Pure.benchmark (tf "japanese.txt")
+        , ReadNumbers.benchmark (tf "numbers.txt")
+        , Replace.benchmark (tf "russian.txt") sink "принимая" "своем"
+        , Sort.benchmark (tf "russian.txt") sink
+        , StripBrackets.benchmark (tf "russian.txt") sink
+        , WordCount.benchmark (tf "russian.txt")
+        ]
+  where
+    -- Location of a test file
+    tf = ("../text/test" </>)

File tests/benchmarks/src/Data/Text/Benchmarks/Builder.hs

+-- | Testing the internal builder monoid
+--
+{-# LANGUAGE OverloadedStrings #-}
+module Data.Text.Benchmarks.Builder
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bgroup, bench, nf)
+import Data.Binary.Builder as B
+import Data.ByteString.Char8 ()
+import Data.Monoid (mconcat)
+import qualified Blaze.ByteString.Builder as Blaze
+import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
+import qualified Data.ByteString as SB
+import qualified Data.ByteString.Lazy as LB
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as LT
+import qualified Data.Text.Lazy.Builder as LTB
+
+benchmark :: IO Benchmark
+benchmark = return $ bgroup "Builder"
+    [ bench "LazyText" $ nf
+        (LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
+    , bench "Binary" $ nf
+        (LB.length . B.toLazyByteString . mconcat . map B.fromByteString)
+        byteStrings
+    , bench "Blaze" $ nf
+        (LB.length . Blaze.toLazyByteString . mconcat . map Blaze.fromString)
+        strings
+    ]
+
+texts :: [T.Text]
+texts = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE texts #-}
+
+-- Note that the non-ascii characters will be chopped
+byteStrings :: [SB.ByteString]
+byteStrings = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE byteStrings #-}
+
+-- Note that the non-ascii characters will be chopped
+strings :: [String]
+strings = take 200000 $ cycle ["foo", "λx", "由の"]
+{-# NOINLINE strings #-}

File tests/benchmarks/src/Data/Text/Benchmarks/CaseMap.hs

+-- | This benchmark converts a number of UTF-8 encoded files to uppercase
+--
+module Data.Text.Benchmarks.CaseMap
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bench)
+import System.IO (Handle)
+import qualified Data.ByteString as B
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+
+benchmark :: FilePath -> Handle -> IO Benchmark
+benchmark fp sink = return $ bench "CaseMap" $
+    B.readFile fp >>= B.hPutStr sink . T.encodeUtf8 . T.toUpper . T.decodeUtf8

File tests/benchmarks/src/Data/Text/Benchmarks/Cut.hs

+-- | Cut into a file, selecting certain columns (e.g. lines 10 to 40)
+--
+module Data.Text.Benchmarks.Cut
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bgroup, bench)
+import System.IO (Handle, hPutStr)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as BC
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BLC
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.IO as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+import qualified Data.Text.Lazy.IO as TL
+
+benchmark :: FilePath -> Handle -> Int -> Int -> IO Benchmark
+benchmark p sink from to = return $ bgroup "Cut"
+    [ bench' "String" string
+    , bench' "ByteString" byteString
+    , bench' "LazyByteString" lazyByteString
+    , bench' "Text" text
+    , bench' "LazyText" lazyText
+    , bench' "TextByteString" textByteString
+    , bench' "LazyTextByteString" lazyTextByteString
+    ]
+  where
+    bench' n s = bench n (s p sink from to)
+
+string :: FilePath -> Handle -> Int -> Int -> IO ()
+string fp sink from to = do
+    s <- readFile fp
+    hPutStr sink $ cut s
+  where
+    cut = unlines . map (take (to - from) . drop from) . lines
+
+byteString :: FilePath -> Handle -> Int -> Int -> IO ()
+byteString fp sink from to = do
+    bs <- B.readFile fp
+    B.hPutStr sink $ cut bs
+  where
+    cut = BC.unlines . map (B.take (to - from) . B.drop from) . BC.lines
+
+lazyByteString :: FilePath -> Handle -> Int -> Int -> IO ()
+lazyByteString fp sink from to = do
+    bs <- BL.readFile fp
+    BL.hPutStr sink $ cut bs
+  where
+    cut = BLC.unlines . map (BL.take (to' - from') . BL.drop from') . BLC.lines
+    from' = fromIntegral from
+    to' = fromIntegral to
+
+text :: FilePath -> Handle -> Int -> Int -> IO ()
+text fp sink from to = do
+    t <- T.readFile fp
+    T.hPutStr sink $ cut t
+  where
+    cut = T.unlines . map (T.take (to - from) . T.drop from) . T.lines
+
+lazyText :: FilePath -> Handle -> Int -> Int -> IO ()
+lazyText fp sink from to = do
+    t <- TL.readFile fp
+    TL.hPutStr sink $ cut t
+  where
+    cut = TL.unlines . map (TL.take (to' - from') . TL.drop from') . TL.lines
+    from' = fromIntegral from
+    to' = fromIntegral to
+
+textByteString :: FilePath -> Handle -> Int -> Int -> IO ()
+textByteString fp sink from to = do
+    t <- T.decodeUtf8 `fmap` B.readFile fp
+    B.hPutStr sink $ T.encodeUtf8 $ cut t
+  where
+    cut = T.unlines . map (T.take (to - from) . T.drop from) . T.lines
+
+lazyTextByteString :: FilePath -> Handle -> Int -> Int -> IO ()
+lazyTextByteString fp sink from to = do
+    t <- TL.decodeUtf8 `fmap` BL.readFile fp
+    BL.hPutStr sink $ TL.encodeUtf8 $ cut t
+  where
+    cut = TL.unlines . map (TL.take (to' - from') . TL.drop from') . TL.lines
+    from' = fromIntegral from
+    to' = fromIntegral to

File tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs

+module Data.Text.Benchmarks.DecodeUtf8
+    ( benchmark
+    ) where
+
+import Control.DeepSeq (rnf)
+import Criterion (Benchmark, bgroup, bench)
+import System.IO (IOMode (ReadMode), openFile, hGetContents, hSetEncoding, utf8)
+import qualified Codec.Binary.UTF8.Generic as U8
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.IO as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+import qualified Data.Text.Lazy.IO as TL
+
+benchmark :: FilePath -> IO Benchmark
+benchmark fp = return $ bgroup "DecodeUtf8"
+    [ bench "Strict" $ do
+        bs <- B.readFile fp
+        rnf (T.decodeUtf8 bs) `seq` return ()
+
+    , bench "StrictLength" $ do
+        bs <- B.readFile fp
+        rnf (T.length $ T.decodeUtf8 bs) `seq` return ()
+
+    , bench "StrictInitLength" $ do
+        bs <- B.readFile fp
+        rnf (T.length $ T.init $ T.decodeUtf8 bs) `seq` return ()
+
+    , bench "StrictIO" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- T.hGetContents h
+        rnf t `seq` return ()
+
+    , bench "StrictLengthIO" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- T.hGetContents h
+        rnf (T.length t) `seq` return ()
+
+    , bench "Lazy" $ do
+        bs <- BL.readFile fp
+        rnf (TL.decodeUtf8 bs) `seq` return ()
+
+    , bench "LazyLength" $ do
+        bs <- BL.readFile fp
+        rnf (TL.length $ TL.decodeUtf8 bs) `seq` return ()
+
+    , bench "LazyInitLength" $ do
+        bs <- BL.readFile fp
+        rnf (TL.length $ TL.init $ TL.decodeUtf8 bs) `seq` return ()
+
+    , bench "LazyIO" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- TL.hGetContents h
+        rnf t `seq` return ()
+
+    , bench "LazyLengthIO" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- TL.hGetContents h
+        rnf (TL.length t) `seq` return ()
+
+    , bench "String" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- hGetContents h
+        rnf t `seq` return ()
+
+    , bench "StringLength" $ do
+        h <- openFile fp ReadMode
+        hSetEncoding h utf8
+        t <- hGetContents h
+        rnf (length t) `seq` return ()
+
+    , bench "LazyStringUtf8" $ do
+        s <- U8.toString `fmap` BL.readFile fp
+        rnf s `seq` return ()
+
+    , bench "LazyStringUtf8Length" $ do
+        s <- U8.toString `fmap` BL.readFile fp