basvandijk avatar basvandijk committed 963ab98

Improve performance of numeric parsers

by parsing to Scientific numbers

Comments (0)

Files changed (7)

 *.hi
 *.o
 /dist/
+.cabal-sandbox
+cabal.sandbox.config

Data/Attoparsec/ByteString/Char8.hs

     , I.atEnd
     ) where
 
-import Control.Applicative ((*>), (<*), (<$>), (<|>))
+import Control.Applicative (pure, (*>), (<*), (<$>), (<|>))
 import Data.Attoparsec.ByteString.FastSet (charClass, memberChar)
 import Data.Attoparsec.ByteString.Internal (Parser, (<?>))
 import Data.Attoparsec.Combinator
 import Data.Bits (Bits, (.|.), shiftL)
 import Data.ByteString.Internal (c2w, w2c)
 import Data.Int (Int8, Int16, Int32, Int64)
-import Data.Ratio ((%))
 import Data.String (IsString(..))
+import Data.Scientific (Scientific, scientific, coefficient, base10Exponent)
 import Data.Word (Word8, Word16, Word32, Word64, Word)
 import Prelude hiding (takeWhile)
 import qualified Data.Attoparsec.ByteString as A
 {-# SPECIALIZE rational :: Parser Double #-}
 {-# SPECIALIZE rational :: Parser Float #-}
 {-# SPECIALIZE rational :: Parser Rational #-}
-rational = floaty $ \real frac fracDenom -> fromRational $
-                     real % 1 + frac % fracDenom
+{-# SPECIALIZE rational :: Parser Scientific #-}
+rational = scientifically realToFrac
 
 -- | Parse a rational number.
 --
 -- This function does not accept string representations of \"NaN\" or
 -- \"Infinity\".
 double :: Parser Double
-double = floaty asDouble
-
-asDouble :: Integer -> Integer -> Integer -> Double
-asDouble real frac fracDenom =
-    fromIntegral real + fromIntegral frac / fromIntegral fracDenom
-{-# INLINE asDouble #-}
+double = rational
 
 -- | Parse a number, attempting to preserve both speed and precision.
 --
 -- 'rational'.
 --
 -- This function does not accept string representations of \"NaN\" or
--- \"Infinity\".
+-- \"
 number :: Parser Number
-number = floaty $ \real frac fracDenom ->
-         if frac == 0 && fracDenom == 0
-         then I real
-         else D (asDouble real frac fracDenom)
-{-# INLINE number #-}
+number = scientifically $ \s ->
+            let e = base10Exponent s
+                c = coefficient s
+            in if e >= 0
+               then I (c * 10 ^ e)
+               else D (fromInteger c / 10 ^ negate e)
 
-data T = T !Integer !Int
-
-floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a
-{-# INLINE floaty #-}
-floaty f = do
+{-# INLINE scientifically #-}
+scientifically :: (Scientific -> a) -> Parser a
+scientifically h = do
   let minus = 45
       plus  = 43
   !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|>
-               return True
-  real <- decimal
-  let tryFraction = do
-        let dot = 46
-        _ <- I.satisfy (==dot)
-        ds <- I.takeWhile isDigit_w8
-        case I.parseOnly decimal ds of
-                Right n -> return $ T n (B.length ds)
-                _       -> fail "no digits after decimal"
-  T fraction fracDigits <- tryFraction <|> return (T 0 0)
+               pure True
+
+  n <- decimal
+
+  let f fracDigits = scientific (B8.foldl' step n fracDigits)
+                                (negate $ B8.length fracDigits)
+      step a w = a * 10 + fromIntegral (w - 48)
+
+  s <- let dot = 46 in
+       (I.satisfy (==dot) *> (f <$> I.takeWhile isDigit_w8)) <|>
+         pure (scientific n 0)
+
+  let !signedCoeff | positive  =          coefficient s
+                   | otherwise = negate $ coefficient s
+
   let littleE = 101
       bigE    = 69
-      e w = w == littleE || w == bigE
-  power <- (I.satisfy e *> signed decimal) <|> return (0::Int)
-  let n = if fracDigits == 0
-          then if power == 0
-               then fromIntegral real
-               else fromIntegral real * (10 ^^ power)
-          else if power == 0
-               then f real fraction (10 ^ fracDigits)
-               else f real fraction (10 ^ fracDigits) * (10 ^^ power)
-  return $ if positive
-           then n
-           else -n
+  (I.satisfy (\c -> c == littleE || c == bigE) *>
+      fmap (h . scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ scientific signedCoeff   (base10Exponent s))

Data/Attoparsec/Number.hs

 --
 -- A simple number type, useful for parsing both exact and inexact
 -- quantities without losing much precision.
-module Data.Attoparsec.Number
-    (
-      Number(..)
-    ) where
+module Data.Attoparsec.Number ( Number(..) ) where
 
 import Control.DeepSeq (NFData(rnf))
 import Data.Data (Data)

Data/Attoparsec/Text.hs

     , I.atEnd
     ) where
 
-import Control.Applicative ((<$>), (*>), (<*), (<|>))
+import Control.Applicative (pure, (<$>), (*>), (<*), (<|>))
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Number (Number(..))
+import Data.Scientific (Scientific, scientific, coefficient, base10Exponent)
 import Data.Attoparsec.Text.Internal ((<?>), Parser, Result, parse, takeWhile1)
 import Data.Bits (Bits, (.|.), shiftL)
 import Data.Char (isAlpha, isDigit, isSpace, ord)
 import Data.Int (Int8, Int16, Int32, Int64)
-import Data.Ratio ((%))
 import Data.Text (Text)
 import Data.Word (Word8, Word16, Word32, Word64, Word)
 import qualified Data.Attoparsec.Internal as I
 {-# SPECIALIZE rational :: Parser Double #-}
 {-# SPECIALIZE rational :: Parser Float #-}
 {-# SPECIALIZE rational :: Parser Rational #-}
-rational = floaty $ \real frac fracDenom -> fromRational $
-                     real % 1 + frac % fracDenom
+{-# SPECIALIZE rational :: Parser Scientific #-}
+rational = scientifically realToFrac
 
 -- | Parse a rational number.
 --
 -- This function does not accept string representations of \"NaN\" or
 -- \"Infinity\".
 double :: Parser Double
-double = floaty asDouble
-
-asDouble :: Integer -> Integer -> Integer -> Double
-asDouble real frac fracDenom =
-    fromIntegral real + fromIntegral frac / fromIntegral fracDenom
-{-# INLINE asDouble #-}
+double = rational
 
 -- | Parse a number, attempting to preserve both speed and precision.
 --
 -- This function does not accept string representations of \"NaN\" or
 -- \"Infinity\".
 number :: Parser Number
-number = floaty $ \real frac fracDenom ->
-         if frac == 0 && fracDenom == 0
-         then I real
-         else D (asDouble real frac fracDenom)
-{-# INLINE number #-}
+number = scientifically $ \s ->
+            let e = base10Exponent s
+                c = coefficient s
+            in if e >= 0
+               then I (c * 10 ^ e)
+               else D (fromInteger c / 10 ^ negate e)
+
+{-# INLINE scientifically #-}
+scientifically :: (Scientific -> a) -> Parser a
+scientifically h = do
+  !positive <- ((== '+') <$> I.satisfy (\c -> c == '-' || c == '+')) <|>
+               pure True
+
+  n <- decimal
+
+  let f fracDigits = scientific (T.foldl' step n fracDigits)
+                                (negate $ T.length fracDigits)
+      step a c = a * 10 + fromIntegral (ord c - 48)
+
+  s <- (I.satisfy (=='.') *> (f <$> I.takeWhile isDigit)) <|>
+         pure (scientific n 0)
+
+  let !signedCoeff | positive  =          coefficient s
+                   | otherwise = negate $ coefficient s
+
+  (I.satisfy (\c -> c == 'e' || c == 'E') *>
+      fmap (h . scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ scientific signedCoeff   (base10Exponent s))
 
 -- | Parse a single digit, as recognised by 'isDigit'.
 digit :: Parser Char
 -- | Type-specialized version of '<*' for 'Text'.
 (<*.) :: Parser a -> Text -> Parser a
 f <*. s = f <* I.string s
-
-data T = T !Integer !Int
-
-floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a
-{-# INLINE floaty #-}
-floaty f = do
-  !positive <- ((== '+') <$> I.satisfy (\c -> c == '-' || c == '+')) <|>
-               return True
-  real <- decimal
-  let tryFraction = do
-        _ <- I.satisfy (=='.')
-        ds <- I.takeWhile isDigit
-        case I.parseOnly decimal ds of
-                Right n -> return $ T n (T.length ds)
-                _       -> fail "no digits after decimal"
-  T fraction fracDigits <- tryFraction <|> return (T 0 0)
-  let e c = c == 'e' || c == 'E'
-  power <- (I.satisfy e *> signed decimal) <|> return (0::Int)
-  let n = if fracDigits == 0
-          then if power == 0
-               then fromIntegral real
-               else fromIntegral real * (10 ^^ power)
-          else if power == 0
-               then f real fraction (10 ^ fracDigits)
-               else f real fraction (10 ^ fracDigits) * (10 ^^ power)
-  return $ if positive
-           then n
-           else -n
                  bytestring,
                  containers,
                  deepseq,
-                 text >= 0.11.1.5
+                 text >= 0.11.1.5,
+                 scientific >= 0.0 && < 0.1
 
   exposed-modules: Data.Attoparsec
                    Data.Attoparsec.ByteString

benchmarks/Benchmarks.hs

        bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"")
      , bench "long" $ nf (AB.parse quotedString) b
      ]
+
+   , let strN     = "1234.56789"
+         strNePos = "1234.56789e3"
+         strNeNeg = "1234.56789e-3"
+     in
+     bgroup "numbers"
+     [ let !tN     = T.pack strN
+           !tNePos = T.pack strNePos
+           !tNeNeg = T.pack strNeNeg
+       in bgroup "Text"
+       [
+         bgroup "no power"
+         [ bench "double"     $ nf (AT.parseOnly AT.double)                            tN
+         , bench "number"     $ nf (AT.parseOnly AT.number)                            tN
+         , bench "rational"   $ nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tN
+         , bench "scientific" $ nf (AT.parseOnly (AT.rational :: AT.Parser Scientifc)) tN
+         ]
+       , bgroup "positive power"
+         [ bench "double"     $ nf (AT.parseOnly AT.double)                            tNePos
+         , bench "number"     $ nf (AT.parseOnly AT.number)                            tNePos
+         , bench "rational"   $ nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tNePos
+         , bench "scientific" $ nf (AT.parseOnly (AT.rational :: AT.Parser Scientifc)) tNePos
+         ]
+       , bgroup "negative power"
+         [ bench "double"     $ nf (AT.parseOnly AT.double)                            tNeNeg
+         , bench "number"     $ nf (AT.parseOnly AT.number)                            tNeNeg
+         , bench "rational"   $ nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tNeNeg
+         , bench "scientific" $ nf (AT.parseOnly (AT.rational :: AT.Parser Scientifc)) tNeNeg
+         ]
+       ]
+     , let !bN     = BC.pack strN
+           !bNePos = BC.pack strNePos
+           !bNeNeg = BC.pack strNeNeg
+       in bgroup "ByteString"
+       [ bgroup "no power"
+         [ bench "double"     $ nf (AC.parseOnly AC.double)                             bN
+         , bench "number"     $ nf (AC.parseOnly AC.number)                             bN
+         , bench "rational"   $ nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bN
+         , bench "scientific" $ nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bN
+         ]
+       , bgroup "positive power"
+         [ bench "double"     $ nf (AC.parseOnly AC.double)                             bNePos
+         , bench "number"     $ nf (AC.parseOnly AC.number)                             bNePos
+         , bench "rational"   $ nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bNePos
+         , bench "scientific" $ nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNePos
+         ]
+       , bgroup "negative power"
+         [ bench "double"     $ nf (AC.parseOnly AC.double)                             bNeNeg
+         , bench "number"     $ nf (AC.parseOnly AC.number)                             bNeNeg
+         , bench "rational"   $ nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bNeNeg
+         , bench "scientific" $ nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNeNeg
+         ]
+       ]
+     ]
    ]
 
 -- Benchmarks bind and (potential) bounds-check merging.

benchmarks/attoparsec-benchmarks.cabal

     base,
     bytestring,
     criterion >= 0.5,
-    deepseq == 1.1.*,
+    deepseq >= 1.1,
     parsec >= 3.1.2,
     text
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.