Commits

Mario Blažević committed a0d6a35

Integrating the benchmarks and their dependencies.

Comments (0)

Files changed (13)

 ^(?:\.cabal-sandbox|benchmarks/\.cabal-sandbox|cabal-dev|dist)$
 \.(?:aux|eventlog|h[ip]|hs.html|log|[mt]ix|[oa]|orig|prof|ps|rej|swp)$
+^tmp/
 ~$
 benchmarks/Arse
 benchmarks/dist

Data/Picoparsec/ByteString/Char8.hs

     , I.endOfLine
     , isEndOfLine
     , isHorizontalSpace
-
-    -- * Numeric parsers
-    , decimal
-    , hexadecimal
-    , signed
-    , double
-    , Number(..)
-    , number
-    , rational
-    , scientific
     ) where
 
-import Control.Applicative (pure, (*>), (<*), (<$>), (<|>))
+import Control.Applicative ((*>), (<*))
 import Data.Picoparsec.ByteString.FastSet (charClass, memberChar)
 import Data.Picoparsec.ByteString.Internal (Parser)
 import Data.Picoparsec.Combinator
-import Data.Picoparsec.Number (Number(..))
-import Data.Bits (Bits, (.|.), shiftL)
 import Data.ByteString.Internal (c2w, w2c)
-import Data.Int (Int8, Int16, Int32, Int64)
-import Data.Scientific (Scientific, coefficient, base10Exponent)
-import qualified Data.Scientific as Sci (scientific)
-import Data.Word (Word8, Word16, Word32, Word64, Word)
+import Data.Word (Word8)
 import Prelude hiding (takeWhile)
 import qualified Data.Picoparsec.ByteString.Internal as I
 import qualified Data.ByteString as B8
 isHorizontalSpace :: Word8 -> Bool
 isHorizontalSpace w = w == 32 || w == 9
 {-# INLINE isHorizontalSpace #-}
-
--- | Parse and decode an unsigned hexadecimal number.  The hex digits
--- @\'a\'@ through @\'f\'@ may be upper or lower case.
---
--- This parser does not accept a leading @\"0x\"@ string.
-hexadecimal :: (Integral a, Bits a) => Parser a
-hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
-  where
-    isHexDigit w = (w >= 48 && w <= 57) ||
-                   (w >= 97 && w <= 102) ||
-                   (w >= 65 && w <= 70)
-    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
-             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
-             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
-{-# SPECIALISE hexadecimal :: Parser Int #-}
-{-# SPECIALISE hexadecimal :: Parser Int8 #-}
-{-# SPECIALISE hexadecimal :: Parser Int16 #-}
-{-# SPECIALISE hexadecimal :: Parser Int32 #-}
-{-# SPECIALISE hexadecimal :: Parser Int64 #-}
-{-# SPECIALISE hexadecimal :: Parser Integer #-}
-{-# SPECIALISE hexadecimal :: Parser Word #-}
-{-# SPECIALISE hexadecimal :: Parser Word8 #-}
-{-# SPECIALISE hexadecimal :: Parser Word16 #-}
-{-# SPECIALISE hexadecimal :: Parser Word32 #-}
-{-# SPECIALISE hexadecimal :: Parser Word64 #-}
-
--- | Parse and decode an unsigned decimal number.
-decimal :: Integral a => Parser a
-decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
-  where isDig w  = w >= 48 && w <= 57
-        step a w = a * 10 + fromIntegral (w - 48)
-{-# SPECIALISE decimal :: Parser Int #-}
-{-# SPECIALISE decimal :: Parser Int8 #-}
-{-# SPECIALISE decimal :: Parser Int16 #-}
-{-# SPECIALISE decimal :: Parser Int32 #-}
-{-# SPECIALISE decimal :: Parser Int64 #-}
-{-# SPECIALISE decimal :: Parser Integer #-}
-{-# SPECIALISE decimal :: Parser Word #-}
-{-# SPECIALISE decimal :: Parser Word8 #-}
-{-# SPECIALISE decimal :: Parser Word16 #-}
-{-# SPECIALISE decimal :: Parser Word32 #-}
-{-# SPECIALISE decimal :: Parser Word64 #-}
-
--- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
--- character.
-signed :: Num a => Parser a -> Parser a
-{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
-{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-}
-{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-}
-{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-}
-{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-}
-{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-}
-signed p = (negate <$> (char8 '-' *> p))
-       <|> (char8 '+' *> p)
-       <|> p
-
--- | Parse a rational number.
---
--- This parser accepts an optional leading sign character, followed by
--- at least one decimal digit.  The syntax similar to that accepted by
--- the 'read' function, with the exception that a trailing @\'.\'@ or
--- @\'e\'@ /not/ followed by a number is not consumed.
---
--- Examples with behaviour identical to 'read', if you feed an empty
--- continuation to the first result:
---
--- >rational "3"     == Done 3.0 ""
--- >rational "3.1"   == Done 3.1 ""
--- >rational "3e4"   == Done 30000.0 ""
--- >rational "3.1e4" == Done 31000.0, ""
---
--- Examples with behaviour identical to 'read':
---
--- >rational ".3"    == Fail "input does not start with a digit"
--- >rational "e3"    == Fail "input does not start with a digit"
---
--- Examples of differences from 'read':
---
--- >rational "3.foo" == Done 3.0 ".foo"
--- >rational "3e"    == Done 3.0 "e"
---
--- This function does not accept string representations of \"NaN\" or
--- \"Infinity\".
-rational :: Fractional a => Parser a
-{-# SPECIALIZE rational :: Parser Double #-}
-{-# SPECIALIZE rational :: Parser Float #-}
-{-# SPECIALIZE rational :: Parser Rational #-}
-{-# SPECIALIZE rational :: Parser Scientific #-}
-rational = scientifically realToFrac
-
--- | Parse a rational number.
---
--- The syntax accepted by this parser is the same as for 'rational'.
---
--- /Note/: This function is almost ten times faster than 'rational',
--- but is slightly less accurate.
---
--- The 'Double' type supports about 16 decimal places of accuracy.
--- For 94.2% of numbers, this function and 'rational' give identical
--- results, but for the remaining 5.8%, this function loses precision
--- around the 15th decimal place.  For 0.001% of numbers, this
--- function will lose precision at the 13th or 14th decimal place.
---
--- This function does not accept string representations of \"NaN\" or
--- \"Infinity\".
-double :: Parser Double
-double = rational
-
--- | Parse a number, attempting to preserve both speed and precision.
---
--- The syntax accepted by this parser is the same as for 'rational'.
---
--- /Note/: This function is almost ten times faster than 'rational'.
--- On integral inputs, it gives perfectly accurate answers, and on
--- floating point inputs, it is slightly less accurate than
--- 'rational'.
---
--- This function does not accept string representations of \"NaN\" or
--- \"
-number :: Parser Number
-number = scientifically $ \s ->
-            let e = base10Exponent s
-                c = coefficient s
-            in if e >= 0
-               then I (c * 10 ^ e)
-               else D (fromInteger c / 10 ^ negate e)
-
--- | Parse a scientific number.
---
--- The syntax accepted by this parser is the same as for 'rational'.
-scientific :: Parser Scientific
-scientific = scientifically id
-
-{-# INLINE scientifically #-}
-scientifically :: (Scientific -> a) -> Parser a
-scientifically h = do
-  let minus = 45
-      plus  = 43
-  !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|>
-               pure True
-
-  n <- decimal
-
-  let f fracDigits = Sci.scientific (B8.foldl' step n fracDigits)
-                                    (negate $ B8.length fracDigits)
-      step a w = a * 10 + fromIntegral (w - 48)
-
-  s <- let dot = 46 in
-       (I.satisfy (==dot) *> (f <$> I.takeWhile isDigit_w8)) <|>
-         pure (Sci.scientific n 0)
-
-  let !signedCoeff | positive  =          coefficient s
-                   | otherwise = negate $ coefficient s
-
-  let littleE = 101
-      bigE    = 69
-  (I.satisfy (\c -> c == littleE || c == bigE) *>
-      fmap (h . Sci.scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
-    return (h $ Sci.scientific signedCoeff   (base10Exponent s))

Data/Picoparsec/Combinator.hs

     , atEnd
     ) where
 
+import Prelude hiding (null)
+
 import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2,
                             (<|>), (*>), (<$>))
 import Control.Monad (MonadPlus(..))
 import Control.Applicative (many)
 #endif
 
+import Data.Monoid.Null (MonoidNull(null))
 import Data.Picoparsec.Internal (demandInput, ensure, put, wantInput)
 import Data.Picoparsec.Internal.Types (Chunk(..), Input(..), Parser(..), addS)
 import Data.Picoparsec.Internal.Types (More(..))
 --
 -- >digit = satisfyElem isDigit
 -- >    where isDigit c = c >= '0' && c <= '9'
-satisfyElem :: Chunk t => (ChunkElem t -> Bool) -> Parser t (ChunkElem t)
+satisfyElem :: (MonoidNull t, Chunk t) => (ChunkElem t -> Bool) -> Parser t (ChunkElem t)
 satisfyElem p = do
   c <- ensure 1
   let !h = unsafeChunkHead c
 {-# INLINE satisfyElem #-}
 
 -- | Match only if all input has been consumed.
-endOfInput :: Chunk t => Parser t ()
+endOfInput :: MonoidNull t => Parser t ()
 endOfInput = Parser $ \i0 a0 m0 kf ks ->
-             if nullChunk (unI i0)
+             if null (unI i0)
              then if m0 == Complete
                   then ks i0 a0 m0 ()
                   else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
 
 -- | Return an indication of whether the end of input has been
 -- reached.
-atEnd :: Chunk t => Parser t Bool
+atEnd :: MonoidNull t => Parser t Bool
 atEnd = not <$> wantInput
 {-# INLINE atEnd #-}

Data/Picoparsec/Internal.hs

     , wantInput
     ) where
 
+import Prelude hiding (null)
+
 import Data.Picoparsec.Internal.Types
 import Data.ByteString (ByteString)
+import Data.Monoid.Null (MonoidNull(null))
 import Data.Text (Text)
 
 -- | Compare two 'IResult' values for equality.
 put c = Parser $ \_i0 a0 m0 _kf ks -> ks (I c) a0 m0 ()
 {-# INLINE put #-}
 
-ensure' :: Chunk t
+ensure' :: (MonoidNull t, Chunk t)
         => Int -> Input t -> Added t -> More -> Failure t r -> Success t t r
         -> IResult t r
 ensure' !n0 i0 a0 m0 kf0 ks0 =
 
 -- | If at least @n@ elements of input are available, return the
 -- current input, otherwise fail.
-ensure :: Chunk t => Int -> Parser t t
+ensure :: (MonoidNull t, Chunk t) => Int -> Parser t t
 ensure !n = Parser $ \i0 a0 m0 kf ks ->
     if chunkLengthAtLeast (unI i0) n
     then ks i0 a0 m0 (unI i0)
 
 -- | Ask for input.  If we receive any, pass it to a success
 -- continuation, otherwise to a failure continuation.
-prompt :: Chunk t
+prompt :: MonoidNull t
        => Input t -> Added t -> More
        -> (Input t -> Added t -> More -> IResult t r)
        -> (Input t -> Added t -> More -> IResult t r)
        -> IResult t r
 prompt i0 a0 _m0 kf ks = Partial $ \s ->
-    if nullChunk s
+    if null s
     then kf i0 a0 Complete
     else ks (i0 <> I s) (a0 <> A s) Incomplete
 {-# SPECIALIZE prompt :: Input ByteString -> Added ByteString -> More
 
 -- | Immediately demand more input via a 'Partial' continuation
 -- result.
-demandInput :: Chunk t => Parser t ()
+demandInput :: MonoidNull t => Parser t ()
 demandInput = Parser $ \i0 a0 m0 kf ks ->
     if m0 == Complete
     then kf i0 a0 m0 ["demandInput"] "not enough input"
 -- | This parser always succeeds.  It returns 'True' if any input is
 -- available either immediately or on demand, and 'False' if the end
 -- of all input has been reached.
-wantInput :: Chunk t => Parser t Bool
+wantInput :: MonoidNull t => Parser t Bool
 wantInput = Parser $ \i0 a0 m0 _kf ks ->
   case () of
-    _ | not (nullChunk (unI i0)) -> ks i0 a0 m0 True
+    _ | not (null (unI i0)) -> ks i0 a0 m0 True
       | m0 == Complete  -> ks i0 a0 m0 False
       | otherwise       -> let kf' i a m = ks i a m False
                                ks' i a m = ks i a m True

Data/Picoparsec/Monoid.hs

     , I.takeTill
 
     -- ** Efficient character string handling
+    , I.skipCharsWhile
     , I.takeCharsWhile
     , I.takeCharsWhile1
     , I.takeCharsTill

Data/Picoparsec/Monoid/Internal.hs

     , takeTill
 
     -- ** Efficient character string handling
+    , skipCharsWhile
     , takeCharsWhile
     , takeCharsWhile1
     , takeCharsTill
       when input go
 {-# INLINE skipWhile #-}
 
+-- | Skip past input characters for as long as the predicate returns 'True'.
+skipCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t ()
+skipCharsWhile p = go
+ where
+  go = do
+    t <- Textual.dropWhile (const False) p <$> get
+    put t
+    when (null t) $ do
+      input <- wantMoreInput
+      when input go
+{-# INLINE skipCharsWhile #-}
+
 -- | Consume input as long as the predicate returns 'False'
 -- (i.e. until it returns 'True'), and return the consumed input.
 --

Data/Picoparsec/Number.hs

-{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE BangPatterns, DeriveDataTypeable, FlexibleInstances, TypeFamilies,
+    TypeSynonymInstances, GADTs, OverloadedStrings #-}
 -- |
 -- Module      :  Data.Picoparsec.Number
 -- Copyright   :  Bryan O'Sullivan 2011, Mario Blažević <blamario@yahoo.com> 2014
 --
 -- A simple number type, useful for parsing both exact and inexact
 -- quantities without losing much precision.
-module Data.Picoparsec.Number ( Number(..) ) where
+module Data.Picoparsec.Number (
+    Number(..)
 
+    -- * Numeric parsers
+    , decimal
+    , hexadecimal
+    , signed
+    , double
+    , number
+    , rational
+    , scientific
+    ) where
+
+import Prelude hiding (length)
+
+import Control.Applicative (pure, (*>), (<$>), (<|>))
 import Control.DeepSeq (NFData(rnf))
+import Data.Monoid.Factorial (length)
+import Data.Monoid.Textual (TextualMonoid, foldl')
+import Data.Bits (Bits, (.|.), shiftL)
+import Data.Char (isDigit, ord)
 import Data.Data (Data)
 import Data.Function (on)
+import Data.Scientific (Scientific, coefficient, base10Exponent)
+import qualified Data.Scientific as Sci (scientific)
 import Data.Typeable (Typeable)
 
+import Data.Picoparsec.Monoid (Parser, string)
+import qualified Data.Picoparsec.Monoid.Internal as I
+
 -- | A numeric type that can represent integers accurately, and
 -- floating point numbers to the precision of a 'Double'.
 data Number = I !Integer
     floor (I a) = fromIntegral a
     floor (D a) = floor a
     {-# INLINE floor #-}
+
+-- | Parse and decode an unsigned hexadecimal number.  The hex digits
+-- @\'a\'@ through @\'f\'@ may be upper or lower case.
+--
+-- This parser does not accept a leading @\"0x\"@ string.
+hexadecimal :: (TextualMonoid t, Integral a, Bits a) => Parser t a
+hexadecimal = foldl' undefined step 0 <$> I.takeCharsWhile1 isHexDigit
+  where
+    isHexDigit c = (c >= '0' && c <= '9') ||
+                   (c >= 'a' && c <= 'f') ||
+                   (c >= 'A' && c <= 'F')
+    step a c | c >= '0' && c <= '9'  = (a `shiftL` 4) .|. fromIntegral (ord c - 48)
+             | c >= 'a'              = (a `shiftL` 4) .|. fromIntegral (ord c - 87)
+             | otherwise             = (a `shiftL` 4) .|. fromIntegral (ord c - 55)
+{-# INLINEABLE hexadecimal #-}
+
+-- | Parse and decode an unsigned decimal number.
+decimal :: (TextualMonoid t, Integral a) => Parser t a
+decimal = foldl' undefined step 0 <$> I.takeCharsWhile1 isDig
+  where isDig c  = c >= '0' && c <= '9'
+        step a c = a * 10 + fromIntegral (ord c - ord '0')
+{-# INLINEABLE decimal #-}
+
+-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
+-- character.
+signed :: (TextualMonoid t, Num a) => Parser t a -> Parser t a
+{-# INLINEABLE signed #-}
+signed p = (negate <$> (string "-" *> p))
+       <|> (string "+" *> p)
+       <|> p
+
+-- | Parse a rational number.
+--
+-- This parser accepts an optional leading sign character, followed by
+-- at least one decimal digit.  The syntax similar to that accepted by
+-- the 'read' function, with the exception that a trailing @\'.\'@ or
+-- @\'e\'@ /not/ followed by a number is not consumed.
+--
+-- Examples with behaviour identical to 'read', if you feed an empty
+-- continuation to the first result:
+--
+-- >rational "3"     == Done 3.0 ""
+-- >rational "3.1"   == Done 3.1 ""
+-- >rational "3e4"   == Done 30000.0 ""
+-- >rational "3.1e4" == Done 31000.0, ""
+--
+-- Examples with behaviour identical to 'read':
+--
+-- >rational ".3"    == Fail "input does not start with a digit"
+-- >rational "e3"    == Fail "input does not start with a digit"
+--
+-- Examples of differences from 'read':
+--
+-- >rational "3.foo" == Done 3.0 ".foo"
+-- >rational "3e"    == Done 3.0 "e"
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+rational :: (TextualMonoid t, Fractional a) => Parser t a
+{-# INLINEABLE rational #-}
+rational = scientifically realToFrac
+
+-- | Parse a rational number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational',
+-- but is slightly less accurate.
+--
+-- The 'Double' type supports about 16 decimal places of accuracy.
+-- For 94.2% of numbers, this function and 'rational' give identical
+-- results, but for the remaining 5.8%, this function loses precision
+-- around the 15th decimal place.  For 0.001% of numbers, this
+-- function will lose precision at the 13th or 14th decimal place.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+double :: TextualMonoid t => Parser t Double
+double = rational
+
+-- | Parse a number, attempting to preserve both speed and precision.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational'.
+-- On integral inputs, it gives perfectly accurate answers, and on
+-- floating point inputs, it is slightly less accurate than
+-- 'rational'.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"
+number :: TextualMonoid t => Parser t Number
+number = scientifically $ \s ->
+            let e = base10Exponent s
+                c = coefficient s
+            in if e >= 0
+               then I (c * 10 ^ e)
+               else D (fromInteger c / 10 ^ negate e)
+
+-- | Parse a scientific number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+scientific :: TextualMonoid t => Parser t Scientific
+scientific = scientifically id
+
+{-# INLINE scientifically #-}
+scientifically :: TextualMonoid t => (Scientific -> a) -> Parser t a
+scientifically h = do
+  !positive <- ((== '+') <$> I.satisfyChar (\c -> c == '-' || c == '+')) <|>
+               pure True
+
+  n <- decimal
+
+  let f fracDigits = Sci.scientific (foldl' undefined step n fracDigits)
+                                    (negate $ length fracDigits)
+      step a c = a * 10 + fromIntegral (ord c - ord '0')
+
+  s <- (string "." *> (f <$> I.takeCharsWhile isDigit)) <|>
+         pure (Sci.scientific n 0)
+
+  let !signedCoeff | positive  =          coefficient s
+                   | otherwise = negate $ coefficient s
+
+  (I.satisfyChar (\c -> c == 'e' || c == 'E') *>
+      fmap (h . Sci.scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ Sci.scientific signedCoeff   (base10Exponent s))

benchmarks/Aeson.hs

-{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
-{-# OPTIONS_GHC -fno-warn-unused-binds #-}
-
-module Aeson
-    (
-      aeson
-    , value'
-    ) where
-
-import Data.ByteString.Builder
-  (Builder, byteString, toLazyByteString, charUtf8, word8)
-
-import Control.Applicative ((*>), (<$>), (<*), liftA2, pure)
-import Control.DeepSeq (NFData(..))
-import Control.Monad (forM)
-import Data.Attoparsec.ByteString.Char8 (Parser, char, endOfInput, scientific,
-                                         skipSpace, string)
-import Data.Bits ((.|.), shiftL)
-import Data.ByteString (ByteString)
-import Data.Char (chr)
-import Data.List (sort)
-import Data.Monoid (mappend, mempty)
-import Data.Scientific (Scientific)
-import Data.Text (Text)
-import Data.Text.Encoding (decodeUtf8')
-import Data.Vector as Vector (Vector, foldl', fromList)
-import Data.Word (Word8)
-import System.Directory (getDirectoryContents)
-import System.FilePath ((</>), dropExtension)
-import qualified Data.Attoparsec.ByteString as A
-import qualified Data.Attoparsec.Lazy as L
-import qualified Data.Attoparsec.Zepto as Z
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as L
-import qualified Data.ByteString.Unsafe as B
-import qualified Data.HashMap.Strict as H
-import Criterion.Main
-
-#define BACKSLASH 92
-#define CLOSE_CURLY 125
-#define CLOSE_SQUARE 93
-#define COMMA 44
-#define DOUBLE_QUOTE 34
-#define OPEN_CURLY 123
-#define OPEN_SQUARE 91
-#define C_0 48
-#define C_9 57
-#define C_A 65
-#define C_F 70
-#define C_a 97
-#define C_f 102
-#define C_n 110
-#define C_t 116
-
-data Result a = Error String
-              | Success a
-                deriving (Eq, Show)
-
-
--- | A JSON \"object\" (key\/value map).
-type Object = H.HashMap Text Value
-
--- | A JSON \"array\" (sequence).
-type Array = Vector Value
-
--- | A JSON value represented as a Haskell value.
-data Value = Object !Object
-           | Array !Array
-           | String !Text
-           | Number !Scientific
-           | Bool !Bool
-           | Null
-             deriving (Eq, Show)
-
-instance NFData Value where
-    rnf (Object o) = rnf o
-    rnf (Array a)  = Vector.foldl' (\x y -> rnf y `seq` x) () a
-    rnf (String s) = rnf s
-    rnf (Number n) = rnf n
-    rnf (Bool b)   = rnf b
-    rnf Null       = ()
-
--- | Parse a top-level JSON value.  This must be either an object or
--- an array, per RFC 4627.
---
--- The conversion of a parsed value to a Haskell value is deferred
--- until the Haskell value is needed.  This may improve performance if
--- only a subset of the results of conversions are needed, but at a
--- cost in thunk allocation.
-json :: Parser Value
-json = json_ object_ array_
-
--- | Parse a top-level JSON value.  This must be either an object or
--- an array, per RFC 4627.
---
--- This is a strict version of 'json' which avoids building up thunks
--- during parsing; it performs all conversions immediately.  Prefer
--- this version if most of the JSON data needs to be accessed.
-json' :: Parser Value
-json' = json_ object_' array_'
-
-json_ :: Parser Value -> Parser Value -> Parser Value
-json_ obj ary = do
-  w <- skipSpace *> A.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE)
-  if w == OPEN_CURLY
-    then obj
-    else ary
-{-# INLINE json_ #-}
-
-object_ :: Parser Value
-object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
-
-object_' :: Parser Value
-object_' = {-# SCC "object_'" #-} do
-  !vals <- objectValues jstring' value'
-  return (Object vals)
- where
-  jstring' = do
-    !s <- jstring
-    return s
-
-objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
-objectValues str val = do
-  skipSpace
-  let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)
-  H.fromList <$> commaSeparated pair CLOSE_CURLY
-{-# INLINE objectValues #-}
-
-array_ :: Parser Value
-array_ = {-# SCC "array_" #-} Array <$> arrayValues value
-
-array_' :: Parser Value
-array_' = {-# SCC "array_'" #-} do
-  !vals <- arrayValues value'
-  return (Array vals)
-
-commaSeparated :: Parser a -> Word8 -> Parser [a]
-commaSeparated item endByte = do
-  w <- A.peekWord8'
-  if w == endByte
-    then A.anyWord8 >> return []
-    else loop
-  where
-    loop = do
-      v <- item <* skipSpace
-      ch <- A.satisfy $ \w -> w == COMMA || w == endByte
-      if ch == COMMA
-        then skipSpace >> (v:) <$> loop
-        else return [v]
-{-# INLINE commaSeparated #-}
-
-arrayValues :: Parser Value -> Parser (Vector Value)
-arrayValues val = do
-  skipSpace
-  Vector.fromList <$> commaSeparated val CLOSE_SQUARE
-{-# INLINE arrayValues #-}
-
--- | Parse any JSON value.  You should usually 'json' in preference to
--- this function, as this function relaxes the object-or-array
--- requirement of RFC 4627.
---
--- In particular, be careful in using this function if you think your
--- code might interoperate with Javascript.  A na&#xef;ve Javascript
--- library that parses JSON data using @eval@ is vulnerable to attack
--- unless the encoded data represents an object or an array.  JSON
--- implementations in other languages conform to that same restriction
--- to preserve interoperability and security.
-value :: Parser Value
-value = do
-  w <- A.peekWord8'
-  case w of
-    DOUBLE_QUOTE  -> A.anyWord8 *> (String <$> jstring_)
-    OPEN_CURLY    -> A.anyWord8 *> object_
-    OPEN_SQUARE   -> A.anyWord8 *> array_
-    C_f           -> string "false" *> pure (Bool False)
-    C_t           -> string "true" *> pure (Bool True)
-    C_n           -> string "null" *> pure Null
-    _              | w >= 48 && w <= 57 || w == 45
-                  -> Number <$> scientific
-      | otherwise -> fail "not a valid json value"
-
--- | Strict version of 'value'. See also 'json''.
-value' :: Parser Value
-value' = do
-  w <- A.peekWord8'
-  case w of
-    DOUBLE_QUOTE  -> do
-                     !s <- A.anyWord8 *> jstring_
-                     return (String s)
-    OPEN_CURLY    -> A.anyWord8 *> object_'
-    OPEN_SQUARE   -> A.anyWord8 *> array_'
-    C_f           -> string "false" *> pure (Bool False)
-    C_t           -> string "true" *> pure (Bool True)
-    C_n           -> string "null" *> pure Null
-    _              | w >= 48 && w <= 57 || w == 45
-                  -> do
-                     !n <- scientific
-                     return (Number n)
-      | otherwise -> fail "not a valid json value"
-
--- | Parse a quoted JSON string.
-jstring :: Parser Text
-jstring = A.word8 DOUBLE_QUOTE *> jstring_
-
--- | Parse a string without a leading quote.
-jstring_ :: Parser Text
-jstring_ = {-# SCC "jstring_" #-} do
-  s <- A.scan False $ \s c -> if s then Just False
-                                   else if c == DOUBLE_QUOTE
-                                        then Nothing
-                                        else Just (c == BACKSLASH)
-  _ <- A.word8 DOUBLE_QUOTE
-  s1 <- if BACKSLASH `B.elem` s
-        then case Z.parse unescape s of
-            Right r  -> return r
-            Left err -> fail err
-         else return s
-
-  case decodeUtf8' s1 of
-      Right r  -> return r
-      Left err -> fail $ show err
-
-{-# INLINE jstring_ #-}
-
-unescape :: Z.Parser ByteString
-unescape = toByteString <$> go mempty where
-  go acc = do
-    h <- Z.takeWhile (/=BACKSLASH)
-    let rest = do
-          start <- Z.take 2
-          let !slash = B.unsafeHead start
-              !t = B.unsafeIndex start 1
-              escape = case B.findIndex (==t) "\"\\/ntbrfu" of
-                         Just i -> i
-                         _      -> 255
-          if slash /= BACKSLASH || escape == 255
-            then fail "invalid JSON escape sequence"
-            else do
-            let cont m = go (acc `mappend` byteString h `mappend` m)
-                {-# INLINE cont #-}
-            if t /= 117 -- 'u'
-              then cont (word8 (B.unsafeIndex mapping escape))
-              else do
-                   a <- hexQuad
-                   if a < 0xd800 || a > 0xdfff
-                     then cont (charUtf8 (chr a))
-                     else do
-                       b <- Z.string "\\u" *> hexQuad
-                       if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
-                         then let !c = ((a - 0xd800) `shiftL` 10) +
-                                       (b - 0xdc00) + 0x10000
-                              in cont (charUtf8 (chr c))
-                         else fail "invalid UTF-16 surrogates"
-    done <- Z.atEnd
-    if done
-      then return (acc `mappend` byteString h)
-      else rest
-  mapping = "\"\\/\n\t\b\r\f"
-
-hexQuad :: Z.Parser Int
-hexQuad = do
-  s <- Z.take 4
-  let hex n | w >= C_0 && w <= C_9 = w - C_0
-            | w >= C_a && w <= C_f = w - 87
-            | w >= C_A && w <= C_F = w - 55
-            | otherwise          = 255
-        where w = fromIntegral $ B.unsafeIndex s n
-      a = hex 0; b = hex 1; c = hex 2; d = hex 3
-  if (a .|. b .|. c .|. d) /= 255
-    then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
-    else fail "invalid hex escape"
-
-decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a
-decodeWith p to s =
-    case L.parse p s of
-      L.Done _ v -> case to v of
-                      Success a -> Just a
-                      _         -> Nothing
-      _          -> Nothing
-{-# INLINE decodeWith #-}
-
-decodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString
-                 -> Maybe a
-decodeStrictWith p to s =
-    case either Error to (A.parseOnly p s) of
-      Success a -> Just a
-      Error _ -> Nothing
-{-# INLINE decodeStrictWith #-}
-
-eitherDecodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString
-                 -> Either String a
-eitherDecodeWith p to s =
-    case L.parse p s of
-      L.Done _ v -> case to v of
-                      Success a -> Right a
-                      Error msg -> Left msg
-      L.Fail _ _ msg -> Left msg
-{-# INLINE eitherDecodeWith #-}
-
-eitherDecodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString
-                       -> Either String a
-eitherDecodeStrictWith p to s =
-    case either Error to (A.parseOnly p s) of
-      Success a -> Right a
-      Error msg -> Left msg
-{-# INLINE eitherDecodeStrictWith #-}
-
--- $lazy
---
--- The 'json' and 'value' parsers decouple identification from
--- conversion.  Identification occurs immediately (so that an invalid
--- JSON document can be rejected as early as possible), but conversion
--- to a Haskell value is deferred until that value is needed.
---
--- This decoupling can be time-efficient if only a smallish subset of
--- elements in a JSON value need to be inspected, since the cost of
--- conversion is zero for uninspected elements.  The trade off is an
--- increase in memory usage, due to allocation of thunks for values
--- that have not yet been converted.
-
--- $strict
---
--- The 'json'' and 'value'' parsers combine identification with
--- conversion.  They consume more CPU cycles up front, but have a
--- smaller memory footprint.
-
--- | Parse a top-level JSON value followed by optional whitespace and
--- end-of-input.  See also: 'json'.
-jsonEOF :: Parser Value
-jsonEOF = json <* skipSpace <* endOfInput
-
--- | Parse a top-level JSON value followed by optional whitespace and
--- end-of-input.  See also: 'json''.
-jsonEOF' :: Parser Value
-jsonEOF' = json' <* skipSpace <* endOfInput
-
-toByteString :: Builder -> ByteString
-toByteString = L.toStrict . toLazyByteString
-{-# INLINE toByteString #-}
-
-aeson :: IO Benchmark
-aeson = do
-  let path = "json-data"
-  names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path
-  benches <- forM names $ \name -> do
-    bs <- B.readFile (path </> name)
-    return . bench (dropExtension name) $ nf (A.parseOnly jsonEOF') bs
-  return $ bgroup "aeson" benches

benchmarks/AttoAeson.hs

+{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+
+module AttoAeson
+    (
+      aeson
+    , value'
+    ) where
+
+import Data.ByteString.Builder
+  (Builder, byteString, toLazyByteString, charUtf8, word8)
+
+import Control.Applicative ((*>), (<$>), (<*), liftA2, pure)
+import Control.DeepSeq (NFData(..))
+import Control.Monad (forM)
+import Data.Attoparsec.ByteString.Char8 (Parser, char, endOfInput, scientific,
+                                         skipSpace, string)
+import Data.Bits ((.|.), shiftL)
+import Data.ByteString (ByteString)
+import Data.Char (chr)
+import Data.List (sort)
+import Data.Monoid (mappend, mempty)
+import Data.Scientific (Scientific)
+import Data.Text (Text)
+import Data.Text.Encoding (decodeUtf8')
+import Data.Vector as Vector (Vector, foldl', fromList)
+import Data.Word (Word8)
+import System.Directory (getDirectoryContents)
+import System.FilePath ((</>), dropExtension)
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.Lazy as L
+import qualified Data.Attoparsec.Zepto as Z
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString.Unsafe as B
+import qualified Data.HashMap.Strict as H
+import Criterion.Main
+
+#define BACKSLASH 92
+#define CLOSE_CURLY 125
+#define CLOSE_SQUARE 93
+#define COMMA 44
+#define DOUBLE_QUOTE 34
+#define OPEN_CURLY 123
+#define OPEN_SQUARE 91
+#define C_0 48
+#define C_9 57
+#define C_A 65
+#define C_F 70
+#define C_a 97
+#define C_f 102
+#define C_n 110
+#define C_t 116
+
+data Result a = Error String
+              | Success a
+                deriving (Eq, Show)
+
+
+-- | A JSON \"object\" (key\/value map).
+type Object = H.HashMap Text Value
+
+-- | A JSON \"array\" (sequence).
+type Array = Vector Value
+
+-- | A JSON value represented as a Haskell value.
+data Value = Object !Object
+           | Array !Array
+           | String !Text
+           | Number !Scientific
+           | Bool !Bool
+           | Null
+             deriving (Eq, Show)
+
+instance NFData Value where
+    rnf (Object o) = rnf o
+    rnf (Array a)  = Vector.foldl' (\x y -> rnf y `seq` x) () a
+    rnf (String s) = rnf s
+    rnf (Number n) = rnf n
+    rnf (Bool b)   = rnf b
+    rnf Null       = ()
+
+-- | Parse a top-level JSON value.  This must be either an object or
+-- an array, per RFC 4627.
+--
+-- The conversion of a parsed value to a Haskell value is deferred
+-- until the Haskell value is needed.  This may improve performance if
+-- only a subset of the results of conversions are needed, but at a
+-- cost in thunk allocation.
+json :: Parser Value
+json = json_ object_ array_
+
+-- | Parse a top-level JSON value.  This must be either an object or
+-- an array, per RFC 4627.
+--
+-- This is a strict version of 'json' which avoids building up thunks
+-- during parsing; it performs all conversions immediately.  Prefer
+-- this version if most of the JSON data needs to be accessed.
+json' :: Parser Value
+json' = json_ object_' array_'
+
+json_ :: Parser Value -> Parser Value -> Parser Value
+json_ obj ary = do
+  w <- skipSpace *> A.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE)
+  if w == OPEN_CURLY
+    then obj
+    else ary
+{-# INLINE json_ #-}
+
+object_ :: Parser Value
+object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
+
+object_' :: Parser Value
+object_' = {-# SCC "object_'" #-} do
+  !vals <- objectValues jstring' value'
+  return (Object vals)
+ where
+  jstring' = do
+    !s <- jstring
+    return s
+
+objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
+objectValues str val = do
+  skipSpace
+  let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)
+  H.fromList <$> commaSeparated pair CLOSE_CURLY
+{-# INLINE objectValues #-}
+
+array_ :: Parser Value
+array_ = {-# SCC "array_" #-} Array <$> arrayValues value
+
+array_' :: Parser Value
+array_' = {-# SCC "array_'" #-} do
+  !vals <- arrayValues value'
+  return (Array vals)
+
+commaSeparated :: Parser a -> Word8 -> Parser [a]
+commaSeparated item endByte = do
+  w <- A.peekWord8'
+  if w == endByte
+    then A.anyWord8 >> return []
+    else loop
+  where
+    loop = do
+      v <- item <* skipSpace
+      ch <- A.satisfy $ \w -> w == COMMA || w == endByte
+      if ch == COMMA
+        then skipSpace >> (v:) <$> loop
+        else return [v]
+{-# INLINE commaSeparated #-}
+
+arrayValues :: Parser Value -> Parser (Vector Value)
+arrayValues val = do
+  skipSpace
+  Vector.fromList <$> commaSeparated val CLOSE_SQUARE
+{-# INLINE arrayValues #-}
+
+-- | Parse any JSON value.  You should usually 'json' in preference to
+-- this function, as this function relaxes the object-or-array
+-- requirement of RFC 4627.
+--
+-- In particular, be careful in using this function if you think your
+-- code might interoperate with Javascript.  A na&#xef;ve Javascript
+-- library that parses JSON data using @eval@ is vulnerable to attack
+-- unless the encoded data represents an object or an array.  JSON
+-- implementations in other languages conform to that same restriction
+-- to preserve interoperability and security.
+value :: Parser Value
+value = do
+  w <- A.peekWord8'
+  case w of
+    DOUBLE_QUOTE  -> A.anyWord8 *> (String <$> jstring_)
+    OPEN_CURLY    -> A.anyWord8 *> object_
+    OPEN_SQUARE   -> A.anyWord8 *> array_
+    C_f           -> string "false" *> pure (Bool False)
+    C_t           -> string "true" *> pure (Bool True)
+    C_n           -> string "null" *> pure Null
+    _              | w >= 48 && w <= 57 || w == 45
+                  -> Number <$> scientific
+      | otherwise -> fail "not a valid json value"
+
+-- | Strict version of 'value'. See also 'json''.
+value' :: Parser Value
+value' = do
+  w <- A.peekWord8'
+  case w of
+    DOUBLE_QUOTE  -> do
+                     !s <- A.anyWord8 *> jstring_
+                     return (String s)
+    OPEN_CURLY    -> A.anyWord8 *> object_'
+    OPEN_SQUARE   -> A.anyWord8 *> array_'
+    C_f           -> string "false" *> pure (Bool False)
+    C_t           -> string "true" *> pure (Bool True)
+    C_n           -> string "null" *> pure Null
+    _              | w >= 48 && w <= 57 || w == 45
+                  -> do
+                     !n <- scientific
+                     return (Number n)
+      | otherwise -> fail "not a valid json value"
+
+-- | Parse a quoted JSON string.
+jstring :: Parser Text
+jstring = A.word8 DOUBLE_QUOTE *> jstring_
+
+-- | Parse a string without a leading quote.
+jstring_ :: Parser Text
+jstring_ = {-# SCC "jstring_" #-} do
+  s <- A.scan False $ \s c -> if s then Just False
+                                   else if c == DOUBLE_QUOTE
+                                        then Nothing
+                                        else Just (c == BACKSLASH)
+  _ <- A.word8 DOUBLE_QUOTE
+  s1 <- if BACKSLASH `B.elem` s
+        then case Z.parse unescape s of
+            Right r  -> return r
+            Left err -> fail err
+         else return s
+
+  case decodeUtf8' s1 of
+      Right r  -> return r
+      Left err -> fail $ show err
+
+{-# INLINE jstring_ #-}
+
+unescape :: Z.Parser ByteString
+unescape = toByteString <$> go mempty where
+  go acc = do
+    h <- Z.takeWhile (/=BACKSLASH)
+    let rest = do
+          start <- Z.take 2
+          let !slash = B.unsafeHead start
+              !t = B.unsafeIndex start 1
+              escape = case B.findIndex (==t) "\"\\/ntbrfu" of
+                         Just i -> i
+                         _      -> 255
+          if slash /= BACKSLASH || escape == 255
+            then fail "invalid JSON escape sequence"
+            else do
+            let cont m = go (acc `mappend` byteString h `mappend` m)
+                {-# INLINE cont #-}
+            if t /= 117 -- 'u'
+              then cont (word8 (B.unsafeIndex mapping escape))
+              else do
+                   a <- hexQuad
+                   if a < 0xd800 || a > 0xdfff
+                     then cont (charUtf8 (chr a))
+                     else do
+                       b <- Z.string "\\u" *> hexQuad
+                       if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
+                         then let !c = ((a - 0xd800) `shiftL` 10) +
+                                       (b - 0xdc00) + 0x10000
+                              in cont (charUtf8 (chr c))
+                         else fail "invalid UTF-16 surrogates"
+    done <- Z.atEnd
+    if done
+      then return (acc `mappend` byteString h)
+      else rest
+  mapping = "\"\\/\n\t\b\r\f"
+
+hexQuad :: Z.Parser Int
+hexQuad = do
+  s <- Z.take 4
+  let hex n | w >= C_0 && w <= C_9 = w - C_0
+            | w >= C_a && w <= C_f = w - 87
+            | w >= C_A && w <= C_F = w - 55
+            | otherwise          = 255
+        where w = fromIntegral $ B.unsafeIndex s n
+      a = hex 0; b = hex 1; c = hex 2; d = hex 3
+  if (a .|. b .|. c .|. d) /= 255
+    then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
+    else fail "invalid hex escape"
+
+decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a
+decodeWith p to s =
+    case L.parse p s of
+      L.Done _ v -> case to v of
+                      Success a -> Just a
+                      _         -> Nothing
+      _          -> Nothing
+{-# INLINE decodeWith #-}
+
+decodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString
+                 -> Maybe a
+decodeStrictWith p to s =
+    case either Error to (A.parseOnly p s) of
+      Success a -> Just a
+      Error _ -> Nothing
+{-# INLINE decodeStrictWith #-}
+
+eitherDecodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString
+                 -> Either String a
+eitherDecodeWith p to s =
+    case L.parse p s of
+      L.Done _ v -> case to v of
+                      Success a -> Right a
+                      Error msg -> Left msg
+      L.Fail _ _ msg -> Left msg
+{-# INLINE eitherDecodeWith #-}
+
+eitherDecodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString
+                       -> Either String a
+eitherDecodeStrictWith p to s =
+    case either Error to (A.parseOnly p s) of
+      Success a -> Right a
+      Error msg -> Left msg
+{-# INLINE eitherDecodeStrictWith #-}
+
+-- $lazy
+--
+-- The 'json' and 'value' parsers decouple identification from
+-- conversion.  Identification occurs immediately (so that an invalid
+-- JSON document can be rejected as early as possible), but conversion
+-- to a Haskell value is deferred until that value is needed.
+--
+-- This decoupling can be time-efficient if only a smallish subset of
+-- elements in a JSON value need to be inspected, since the cost of
+-- conversion is zero for uninspected elements.  The trade off is an
+-- increase in memory usage, due to allocation of thunks for values
+-- that have not yet been converted.
+
+-- $strict
+--
+-- The 'json'' and 'value'' parsers combine identification with
+-- conversion.  They consume more CPU cycles up front, but have a
+-- smaller memory footprint.
+
+-- | Parse a top-level JSON value followed by optional whitespace and
+-- end-of-input.  See also: 'json'.
+jsonEOF :: Parser Value
+jsonEOF = json <* skipSpace <* endOfInput
+
+-- | Parse a top-level JSON value followed by optional whitespace and
+-- end-of-input.  See also: 'json''.
+jsonEOF' :: Parser Value
+jsonEOF' = json' <* skipSpace <* endOfInput
+
+toByteString :: Builder -> ByteString
+toByteString = L.toStrict . toLazyByteString
+{-# INLINE toByteString #-}
+
+aeson :: IO Benchmark
+aeson = do
+  let path = "json-data"
+  names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path
+  benches <- forM names $ \name -> do
+    bs <- B.readFile (path </> name)
+    return . bench (dropExtension name) $ nf (A.parseOnly jsonEOF') bs
+  return $ bgroup "aeson" benches

benchmarks/Benchmarks.hs

 import Numbers (numbers)
 import Text.Parsec.Text ()
 import Text.Parsec.Text.Lazy ()
-import qualified Aeson
+import qualified AttoAeson
+import qualified PicoAeson
 import qualified Data.Attoparsec.ByteString as AB
 import qualified Data.Attoparsec.ByteString.Char8 as AC
 import qualified Data.Attoparsec.ByteString.Lazy as ABL
       !t = T.pack s
       !tl = TL.fromChunks . map T.pack . chunksOf 4 $ s
       !utf8b = UTF8.ByteStringUTF8 b
-  aeson <- Aeson.aeson
+  aesonA <- AttoAeson.aeson
+  aesonP <- PicoAeson.aeson
   headersBS <- HeadersByteString.headers
   headersT <- HeadersText.headers
   defaultMain [
        bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"")
      , bench "long" $ nf (AB.parse quotedString) b
      ]
-   , aeson
+   , aesonA
+   , aesonP
    , headersBS
    , headersT
    , Links.links

benchmarks/Numbers.hs

 
 module Numbers (numbers) where
 
-import Control.DeepSeq (NFData)
 import Criterion.Main (bench, bgroup, nf)
 import Criterion.Types (Benchmark)
 import Data.Scientific (Scientific(..))
 import qualified Data.Attoparsec.ByteString.Char8 as AC
 import qualified Data.Attoparsec.Text as AT
 import qualified Data.Picoparsec as P
+import qualified Data.Picoparsec.Number as P
 import qualified Data.ByteString.Char8 as BC
-import qualified Data.ByteString.Lazy.Char8 as BLC
 import qualified Data.Text as T
-import qualified Data.Monoid.Instances.ByteString.UTF8 as UTF8
-
-instance NFData UTF8.ByteStringUTF8
-
+import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(..))
 
 strN, strNePos, strNeNeg :: String
 strN     = "1234.56789"
       !tNePos = T.pack strNePos
       !tNeNeg = T.pack strNeNeg
   in bgroup "Text"
-  [
-    bgroup "no power"
-    [ bench "double" $ nf (AT.parseOnly AT.double) tN
-    , bench "number" $ nf (AT.parseOnly AT.number) tN
-    , bench "rational" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tN
-    , bench "scientific" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tN
+  [ bgroup "attoparsec"
+    [
+      bgroup "no power"
+      [ bench "double" $ nf (AT.parseOnly AT.double) tN
+      , bench "number" $ nf (AT.parseOnly AT.number) tN
+      , bench "rational" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tN
+      , bench "scientific" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tN
+      ]
+    , bgroup "positive power"
+      [ bench "double" $ nf (AT.parseOnly AT.double) tNePos
+      , bench "number" $ nf (AT.parseOnly AT.number) tNePos
+      , bench "rational" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tNePos
+      , bench "scientific" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNePos
+      ]
+    , bgroup "negative power"
+      [ bench "double" $ nf (AT.parseOnly AT.double) tNeNeg
+      , bench "number" $ nf (AT.parseOnly AT.number) tNeNeg
+      , bench "rational" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tNeNeg
+      , bench "scientific" $
+        nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNeNeg
+      ]
     ]
-  , bgroup "positive power"
-    [ bench "double" $ nf (AT.parseOnly AT.double) tNePos
-    , bench "number" $ nf (AT.parseOnly AT.number) tNePos
-    , bench "rational" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tNePos
-    , bench "scientific" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNePos
-    ]
-  , bgroup "negative power"
-    [ bench "double" $ nf (AT.parseOnly AT.double) tNeNeg
-    , bench "number" $ nf (AT.parseOnly AT.number) tNeNeg
-    , bench "rational" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tNeNeg
-    , bench "scientific" $
-      nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNeNeg
+  , bgroup "picoparsec"
+    [
+      bgroup "no power"
+      [ bench "double" $ nf (P.parseOnly P.double) tN
+      , bench "number" $ nf (P.parseOnly P.number) tN
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Rational)) tN
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Scientific)) tN
+      ]
+    , bgroup "positive power"
+      [ bench "double" $ nf (P.parseOnly P.double) tNePos
+      , bench "number" $ nf (P.parseOnly P.number) tNePos
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Rational)) tNePos
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Scientific)) tNePos
+      ]
+    , bgroup "negative power"
+      [ bench "double" $ nf (P.parseOnly P.double) tNeNeg
+      , bench "number" $ nf (P.parseOnly P.number) tNeNeg
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Rational))  tNeNeg
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser T.Text Scientific)) tNeNeg
+      ]
     ]
   ]
   , let !bN     = BC.pack strN
         !bNePos = BC.pack strNePos
         !bNeNeg = BC.pack strNeNeg
+        buN     = ByteStringUTF8 bN
+        buNePos = ByteStringUTF8 bNePos
+        buNeNeg = ByteStringUTF8 bNeNeg
   in bgroup "ByteString"
-  [ bgroup "no power"
-    [ bench "double" $ nf (AC.parseOnly AC.double) bN
-    , bench "number" $ nf (AC.parseOnly AC.number) bN
-    , bench "rational" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bN
-    , bench "scientific" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bN
+  [ bgroup "attoparsec"
+    [ bgroup "no power"
+      [ bench "double" $ nf (AC.parseOnly AC.double) bN
+      , bench "number" $ nf (AC.parseOnly AC.number) bN
+      , bench "rational" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bN
+      , bench "scientific" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bN
+      ]
+    , bgroup "positive power"
+      [ bench "double" $ nf (AC.parseOnly AC.double) bNePos
+      , bench "number" $ nf (AC.parseOnly AC.number) bNePos
+      , bench "rational" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNePos
+      , bench "scientific" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNePos
+      ]
+    , bgroup "negative power"
+      [ bench "double" $ nf (AC.parseOnly AC.double) bNeNeg
+      , bench "number" $ nf (AC.parseOnly AC.number) bNeNeg
+      , bench "rational" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNeNeg
+      , bench "scientific" $
+        nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNeNeg
+      ]
     ]
-  , bgroup "positive power"
-    [ bench "double" $ nf (AC.parseOnly AC.double) bNePos
-    , bench "number" $ nf (AC.parseOnly AC.number) bNePos
-    , bench "rational" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNePos
-    , bench "scientific" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNePos
-    ]
-  , bgroup "negative power"
-    [ bench "double" $ nf (AC.parseOnly AC.double) bNeNeg
-    , bench "number" $ nf (AC.parseOnly AC.number) bNeNeg
-    , bench "rational" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNeNeg
-    , bench "scientific" $
-      nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNeNeg
+  , bgroup "picoparsec"
+    [ bgroup "no power"
+      [ bench "double" $ nf (P.parseOnly P.double) buN
+      , bench "number" $ nf (P.parseOnly P.number) buN
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Rational))   buN
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Scientific)) buN
+      ]
+    , bgroup "positive power"
+      [ bench "double" $ nf (P.parseOnly P.double) buNePos
+      , bench "number" $ nf (P.parseOnly P.number) buNePos
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Rational)) buNePos
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Scientific)) buNePos
+      ]
+    , bgroup "negative power"
+      [ bench "double" $ nf (P.parseOnly P.double) buNeNeg
+      , bench "number" $ nf (P.parseOnly P.number) buNeNeg
+      , bench "rational" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Rational)) buNeNeg
+      , bench "scientific" $
+        nf (P.parseOnly (P.rational :: P.Parser ByteStringUTF8 Scientific)) buNeNeg
+      ]
     ]
   ]
  ]

benchmarks/PicoAeson.hs

+{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+
+module PicoAeson
+    (
+      aeson
+    , value'
+    ) where
+
+import Control.Applicative ((*>), (<$>), (<*), (<|>), liftA2, many, pure)
+import Control.DeepSeq (NFData(..))
+import Control.Monad (forM)
+import Data.Picoparsec (Parser, char, endOfInput, string)
+import Data.Picoparsec.Number (scientific)
+import Data.Bits ((.|.), shiftL)
+import Data.Char (chr, isSpace, ord)
+import Data.Hashable (Hashable(..))
+import Data.List (sort)
+import Data.Monoid (mconcat)
+import Data.Monoid.Factorial (factors)
+import Data.Monoid.Textual (TextualMonoid, characterPrefix, singleton)
+import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(..))
+import Data.Scientific (Scientific)
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+import Data.Vector as Vector (Vector, foldl', fromList)
+import System.Directory (getDirectoryContents)
+import System.FilePath ((</>), dropExtension)
+import qualified Data.Picoparsec as P
+import qualified Data.ByteString as B
+import qualified Data.HashMap.Strict as H
+import Criterion.Main
+
+data Result a = Error String
+              | Success a
+                deriving (Eq, Show)
+
+
+-- | A JSON \"object\" (key\/value map).
+type Object t = H.HashMap t (Value t)
+
+-- | A JSON \"array\" (sequence).
+type Array t = Vector (Value t)
+
+-- | A JSON value represented as a Haskell value.
+data Value t = Object !(Object t)
+             | Array !(Array t)
+             | String !t
+             | Number !Scientific
+             | Bool !Bool
+             | Null
+               deriving (Eq, Show)
+
+instance NFData t => NFData (Value t) where
+    rnf (Object o) = rnf o
+    rnf (Array a)  = Vector.foldl' (\x y -> rnf y `seq` x) () a
+    rnf (String s) = rnf s
+    rnf (Number n) = rnf n
+    rnf (Bool b)   = rnf b
+    rnf Null       = ()
+
+
+instance NFData ByteStringUTF8 where
+  rnf (ByteStringUTF8 b) = rnf b
+
+instance Hashable ByteStringUTF8 where
+  hashWithSalt i (ByteStringUTF8 b) = hashWithSalt i b
+  
+
+-- | Parse a top-level JSON value.  This must be either an object or
+-- an array, per RFC 4627.
+--
+-- The conversion of a parsed value to a Haskell value is deferred
+-- until the Haskell value is needed.  This may improve performance if
+-- only a subset of the results of conversions are needed, but at a
+-- cost in thunk allocation.
+json :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+json = json_ object_ array_
+{-# INLINEABLE json #-}
+
+-- | Parse a top-level JSON value.  This must be either an object or
+-- an array, per RFC 4627.
+--
+-- This is a strict version of 'json' which avoids building up thunks
+-- during parsing; it performs all conversions immediately.  Prefer
+-- this version if most of the JSON data needs to be accessed.
+json' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+json' = json_ object_' array_'
+{-# INLINEABLE json' #-}
+
+json_ :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Value t) -> Parser t (Value t)
+json_ obj ary = do
+  w <- skipSpace *> P.satisfyChar (\c -> c == '{' || c == '[')
+  if w == '{'
+    then obj
+    else ary
+{-# INLINE json_ #-}
+
+object_ :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
+{-# INLINEABLE object_ #-}
+
+object_' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+object_' = {-# SCC "object_'" #-} do
+  !vals <- objectValues jstring' value'
+  return (Object vals)
+ where
+  jstring' = do
+    !s <- jstring
+    return s
+{-# INLINEABLE object_' #-}
+
+objectValues :: (Hashable t, Ord t, TextualMonoid t)
+                => Parser t t -> Parser t (Value t) -> Parser t (H.HashMap t (Value t))
+objectValues str val = do
+  skipSpace
+  let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)
+  H.fromList <$> commaSeparated pair '}'
+{-# INLINE objectValues #-}
+
+array_ :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+array_ = {-# SCC "array_" #-} Array <$> arrayValues value
+{-# INLINEABLE array_ #-}
+
+array_' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+array_' = {-# SCC "array_'" #-} do
+  !vals <- arrayValues value'
+  return (Array vals)
+{-# INLINEABLE array_' #-}
+
+commaSeparated :: (Eq t, TextualMonoid t) => Parser t a -> Char -> Parser t [a]
+commaSeparated item end = do
+  c <- P.peekToken
+  if c == singleton end
+    then P.anyToken >> return []
+    else loop
+  where
+    loop = do
+      v <- item <* skipSpace
+      (string "," *> skipSpace *> ((v:) <$> loop)
+       <|> char end *> pure [v])
+{-# INLINE commaSeparated #-}
+
+arrayValues :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Vector (Value t))
+arrayValues val = do
+  skipSpace
+  Vector.fromList <$> commaSeparated val ']'
+{-# INLINE arrayValues #-}
+
+-- | Parse any JSON value.  You should usually 'json' in preference to
+-- this function, as this function relaxes the object-or-array
+-- requirement of RFC 4627.
+--
+-- In particular, be careful in using this function if you think your
+-- code might interoperate with Javascript.  A na&#xef;ve Javascript
+-- library that parses JSON data using @eval@ is vulnerable to attack
+-- unless the encoded data represents an object or an array.  JSON
+-- implementations in other languages conform to that same restriction
+-- to preserve interoperability and security.
+value :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+value = do
+  c <- P.peekToken
+  case c of
+    "\""  -> P.anyToken *> (String <$> jstring_)
+    "{"   -> P.anyToken *> object_
+    "["   -> P.anyToken *> array_
+    "f"   -> string "false" *> pure (Bool False)
+    "t"   -> string "true" *> pure (Bool True)
+    "n"   -> string "null" *> pure Null
+    _      | c >= "0" && c <= "9" || c == "."
+          -> Number <$> scientific
+           | otherwise -> fail "not a valid json value"
+{-# INLINEABLE value #-}
+
+-- | Strict version of 'value'. See also 'json''.
+value' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+value' = do
+  c <- P.peekToken
+  case c of
+    "\""  -> P.anyToken *> (String <$> jstring_)
+    "{"   -> P.anyToken *> object_'
+    "["   -> P.anyToken *> array_'
+    "f"   -> string "false" *> pure (Bool False)
+    "t"   -> string "true" *> pure (Bool True)
+    "n"   -> string "null" *> pure Null
+    _      | c >= "0" && c <= "9" || c == "."
+          -> do
+             !n <- scientific
+             return (Number n)
+           | otherwise -> fail "not a valid json value"
+{-# INLINEABLE value' #-}
+
+-- | Parse a quoted JSON string.
+jstring :: TextualMonoid t => Parser t t
+jstring = string "\"" *> jstring_
+{-# INLINEABLE jstring #-}
+
+-- | Parse a string without a leading quote.
+jstring_ :: TextualMonoid t => Parser t t
+jstring_ = {-# SCC "jstring_" #-}
+  mconcat <$> (many (P.takeTillChar1 (\c-> c == '"' || c == '\\')
+                     <|> string "\\" *> unescape)
+               <* string "\"")
+{-# INLINE jstring_ #-}
+
+unescape :: TextualMonoid t => Parser t t
+unescape = (P.satisfyChar (`elem` "\"\\/ntbrfu")
+            <|> fail "invalid JSON escape sequence")
+           >>= \c-> case c
+                    of '"' -> pure "\""
+                       '\\' -> pure "\\"
+                       '/' -> pure "/"
+                       'n' -> pure "\n"
+                       't' -> pure "\t"
+                       'b' -> pure "\b"
+                       'r' -> pure "\r"
+                       'f' -> pure "\f"
+                       'u' -> singleton <$> chr <$> hexQuad
+                       _ -> undefined
+{-# INLINE unescape #-}
+
+hexQuad :: TextualMonoid t => Parser t Int
+hexQuad = do [a, b, c, d] <- map hex <$> factors <$> P.take 4
+             if (a .|. b .|. c .|. d) /= 255
+               then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
+               else fail "invalid hex escape"
+  where hex n = case characterPrefix n
+                of Just w | w >= '0' && w <= '9' -> ord w - ord '0'
+                          | w >= 'a' && w <= 'f' -> ord w - (ord 'a' - 10)
+                          | w >= 'A' && w <= 'F' -> ord w - (ord 'A' - 10)
+                   _ -> 255
+{-# INLINE hexQuad #-}
+
+-- $lazy
+--
+-- The 'json' and 'value' parsers decouple identification from
+-- conversion.  Identification occurs immediately (so that an invalid
+-- JSON document can be rejected as early as possible), but conversion
+-- to a Haskell value is deferred until that value is needed.
+--
+-- This decoupling can be time-efficient if only a smallish subset of
+-- elements in a JSON value need to be inspected, since the cost of
+-- conversion is zero for uninspected elements.  The trade off is an
+-- increase in memory usage, due to allocation of thunks for values
+-- that have not yet been converted.
+
+-- $strict
+--
+-- The 'json'' and 'value'' parsers combine identification with
+-- conversion.  They consume more CPU cycles up front, but have a
+-- smaller memory footprint.
+
+skipSpace :: TextualMonoid t => Parser t ()
+skipSpace = P.skipCharsWhile isSpace
+{-# INLINE skipSpace #-}
+
+-- | Parse a top-level JSON value followed by optional whitespace and
+-- end-of-input.  See also: 'json'.
+jsonEOF :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+jsonEOF = json <* skipSpace <* endOfInput
+{-# INLINEABLE jsonEOF #-}
+
+-- | Parse a top-level JSON value followed by optional whitespace and
+-- end-of-input.  See also: 'json''.
+jsonEOF' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+jsonEOF' = json' <* skipSpace <* endOfInput
+{-# SPECIALIZE jsonEOF' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-}
+{-# SPECIALIZE jsonEOF' :: Parser T.Text (Value T.Text) #-}
+
+aeson :: IO Benchmark
+aeson = do
+  let path = "json-data"
+  names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path
+  benches1 <- forM names $ \name -> do
+    bs <- B.readFile (path </> name)
+    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') $ ByteStringUTF8 bs
+  benches2 <- forM names $ \name -> do
+    t <- T.readFile (path </> name)
+    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') t
+  return $ bgroup "picoparsec-aeson"
+    [ bgroup "ByteStringUTF8" benches1
+    , bgroup "Text"           benches2 ]
   ghc-options: -O2 -Wall
   main-is: Benchmarks.hs
   other-modules:
+    AttoAeson
+    PicoAeson
     HeadersByteString
     HeadersText
     Links
     deepseq >= 1.1,
     directory,
     filepath,
+    hashable,
     monoid-subclasses < 0.4,
     parsec >= 3.1.2,
     picoparsec,