Commits

wren romano committed e6c38ef

added IsSpace.hs benchmark (forgot it in the prev patch)

  • Participants
  • Parent commits f196b11

Comments (0)

Files changed (1)

benchmarks/IsSpace.hs

+{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
+{-# LANGUAGE ForeignFunctionInterface #-}
+----------------------------------------------------------------
+--                                                    2010.10.09
+-- |
+-- Module      :  IsSpace
+-- Copyright   :  Copyright (c) 2010 wren ng thornton
+-- License     :  BSD
+-- Maintainer  :  wren@community.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable (FFI)
+--
+-- A benchmark for comparing different definitions of predicates
+-- for detecting whitespace. As of the last run the results are:
+-- 
+-- * Data.Char.isSpace             : 14.44786 us +/- 258.0377 ns
+-- * isSpace_DataChar              : 43.25154 us +/- 655.7037 ns
+-- * isSpace_Char                  : 29.26598 us +/- 454.1445 ns
+-- * isPerlSpace                   :
+-- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us
+-- * isSpace_Char8                 : 11.84677 us +/- 178.9795 ns
+-- * isSpace_w8                    : 11.55470 us +/- 133.7644 ns
+----------------------------------------------------------------
+module IsSpace (main) where
+
+import qualified Data.Char             as C
+import           Data.Word             (Word8)
+import qualified Data.ByteString       as B
+import qualified Data.ByteString.Char8 as B8
+import           Foreign.C.Types       (CInt)
+
+import           Criterion             (bench, nf)
+import           Criterion.Main        (defaultMain)
+
+----------------------------------------------------------------
+----- Character predicates
+-- N.B. \x9..\xD == "\t\n\v\f\r"
+
+-- | Recognize the same characters as Perl's @/\s/@ in Unicode mode.
+-- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except
+-- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@,
+-- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not
+-- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@
+-- (nor non-Unicode @/\s/@).
+isPerlSpace :: Char -> Bool
+isPerlSpace c
+    =  (' '      == c)
+    || ('\t' <= c && c <= '\r' && c /= '\v')
+    || ('\x85'   == c)
+    || ('\x2028' == c)
+    || ('\x2029' == c)
+{-# INLINE isPerlSpace #-}
+
+
+-- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's
+-- not exported. This is the definition as of attoparsec-0.8.1.0.
+isSpace :: Char -> Bool
+isSpace c = c `B8.elem` spaces
+    where
+    spaces = B8.pack " \n\r\t\v\f"
+    {-# NOINLINE spaces #-}
+{-# INLINE isSpace #-}
+
+
+-- | An alternate version of 'Data.Attoparsec.Char8.isSpace'.
+isSpace_Char8 :: Char -> Bool
+isSpace_Char8 c =  (' ' == c) || ('\t' <= c && c <= '\r')
+{-# INLINE isSpace_Char8 #-}
+
+
+-- | An alternate version of 'Data.Char.isSpace'. This uses the
+-- same trick as 'isSpace_Char8' but we include Unicode whitespaces
+-- too, in order to have the same results as 'Data.Char.isSpace'
+-- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace).
+isSpace_Char :: Char -> Bool
+isSpace_Char c
+    =  (' '    == c)
+    || ('\t' <= c && c <= '\r')
+    || ('\xA0' == c)
+    || (iswspace (fromIntegral (C.ord c)) /= 0)
+{-# INLINE isSpace_Char #-}
+
+foreign import ccall unsafe "u_iswspace"
+    iswspace :: CInt -> CInt
+
+-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
+-- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char'
+-- is slower than 'Data.Char.isSpace'. It appears to be something
+-- special in how the base library was compiled.
+isSpace_DataChar :: Char -> Bool
+isSpace_DataChar c =
+    c == ' '     ||
+    c == '\t'    ||
+    c == '\n'    ||
+    c == '\r'    ||
+    c == '\f'    ||
+    c == '\v'    ||
+    c == '\xa0'  ||
+    iswspace (fromIntegral (C.ord c)) /= 0
+{-# INLINE isSpace_DataChar #-}
+
+
+-- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'.
+isSpace_w8 :: Word8 -> Bool
+isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
+{-# INLINE isSpace_w8 #-}
+
+----------------------------------------------------------------
+
+main :: IO ()
+main = defaultMain
+    [ bench "Data.Char.isSpace" $ nf (map C.isSpace)        ['\x0'..'\255']
+    , bench "isSpace_DataChar"  $ nf (map isSpace_DataChar) ['\x0'..'\255']
+    , bench "isSpace_Char"      $ nf (map isSpace_Char)     ['\x0'..'\255']
+    , bench "isPerlSpace"       $ nf (map isPerlSpace)      ['\x0'..'\255']
+    , bench "Data.Attoparsec.Char8.isSpace"
+                                $ nf (map isSpace)          ['\x0'..'\255']
+    , bench "isSpace_Char8"     $ nf (map isSpace_Char8)    ['\x0'..'\255']
+    , bench "isSpace_w8"        $ nf (map isSpace_w8)       [0..255]
+    ]
+
+----------------------------------------------------------------
+----------------------------------------------------------- fin.