1. Mario Blažević
  2. picoparsec

Commits

Mario Blažević  committed 2e61460

Fixed the Aeson benhmarks.

  • Participants
  • Parent commits 6f31255
  • Branches default

Comments (0)

Files changed (1)

File benchmarks/PicoAeson.hs

View file
 import Data.Hashable (Hashable(..))
 import Data.List (sort)
 import Data.Monoid (mconcat)
-import Data.Monoid.Factorial (factors)
-import Data.Monoid.Textual (TextualMonoid, characterPrefix, singleton)
+import Data.Monoid.Textual (TextualMonoid, singleton)
+import qualified Data.Monoid.Textual as Textual
 import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(..))
 import Data.Scientific (Scientific)
 import qualified Data.Text as T
 -- until the Haskell value is needed.  This may improve performance if
 -- only a subset of the results of conversions are needed, but at a
 -- cost in thunk allocation.
-json :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+json :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
 json = json_ object_ array_
 {-# INLINEABLE json #-}
 
 -- This is a strict version of 'json' which avoids building up thunks
 -- during parsing; it performs all conversions immediately.  Prefer
 -- this version if most of the JSON data needs to be accessed.
-json' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+json' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
 json' = json_ object_' array_'
 {-# INLINEABLE json' #-}
 
   where
     loop = do
       v <- item <* skipSpace
-      (string "," *> skipSpace *> ((v:) <$> loop)
-       <|> char end *> pure [v])
+      ch <- P.satisfyChar $ \w -> w == ',' || w == end
+      if ch == ','
+        then skipSpace >> (v:) <$> loop
+        else return [v]
 {-# INLINE commaSeparated #-}
 
 arrayValues :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Vector (Value t))
 value' = do
   c <- P.peekChar
   case c of
-    '"'   -> P.anyToken *> (String <$> jstring_)
+    '"'   -> do
+             !s <- P.anyToken *> jstring_
+             return (String s)
     '{'   -> P.anyToken *> object_'
     '['   -> P.anyToken *> array_'
     'f'   -> string "false" *> pure (Bool False)
 
 -- | Parse a quoted JSON string.
 jstring :: TextualMonoid t => Parser t t
-jstring = string "\"" *> jstring_
+jstring = char '"' *> jstring_
 {-# INLINEABLE jstring #-}
 
 -- | Parse a string without a leading quote.
 unescape :: TextualMonoid t => Parser t t
 unescape = (P.satisfyChar (`elem` "\"\\/ntbrfu")
             <|> fail "invalid JSON escape sequence")
-           >>= \c-> case c
+           >>= \e-> case e
                     of '"' -> pure "\""
                        '\\' -> pure "\\"
                        '/' -> pure "/"
                        'b' -> pure "\b"
                        'r' -> pure "\r"
                        'f' -> pure "\f"
-                       'u' -> singleton <$> chr <$> hexQuad
-                       _ -> undefined
+                       'u' -> do a <- hexQuad
+                                 if a < 0xd800 || a > 0xdfff
+                                   then pure (singleton $ chr a)
+                                   else do b <- P.string "\\u" *> hexQuad
+                                           if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
+                                             then let !c = ((a - 0xd800) `shiftL` 10) + (b - 0xdc00) + 0x10000
+                                                  in pure (singleton $ chr c)
+                                             else fail "invalid UTF-16 surrogates"
+                       _ -> fail "invalid JSON escape sequence"
 {-# INLINE unescape #-}
 
 hexQuad :: TextualMonoid t => Parser t Int
-hexQuad = do [a, b, c, d] <- map hex <$> factors <$> P.take 4
-             if (a .|. b .|. c .|. d) /= 255
-               then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
-               else fail "invalid hex escape"
-  where hex n = case characterPrefix n
-                of Just w | w >= '0' && w <= '9' -> ord w - ord '0'
-                          | w >= 'a' && w <= 'f' -> ord w - (ord 'a' - 10)
-                          | w >= 'A' && w <= 'F' -> ord w - (ord 'A' - 10)
-                   _ -> 255
+hexQuad = do s <- P.take 4
+             let q = Textual.foldl' (const $ const (-1)) extend 0 s :: Int
+             if q < 0 then fail "invalid hex escape" else return q
+  where extend n c = n `shiftL` 4 .|. hex c
+        hex c | c >= '0' && c <= '9' = ord c - ord '0'
+              | c >= 'a' && c <= 'f' = ord c - (ord 'a' - 10)
+              | c >= 'A' && c <= 'F' = ord c - (ord 'A' - 10)
+              | otherwise = -1
 {-# INLINE hexQuad #-}
 
 -- $lazy
 
 -- | Parse a top-level JSON value followed by optional whitespace and
 -- end-of-input.  See also: 'json'.
-jsonEOF :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+jsonEOF :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
 jsonEOF = json <* skipSpace <* endOfInput
 {-# INLINEABLE jsonEOF #-}
 
 -- | Parse a top-level JSON value followed by optional whitespace and
 -- end-of-input.  See also: 'json''.
-jsonEOF' :: (Hashable t, Ord t, TextualMonoid t) => Parser t (Value t)
+jsonEOF' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
 jsonEOF' = json' <* skipSpace <* endOfInput
 {-# SPECIALIZE jsonEOF' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-}
 {-# SPECIALIZE jsonEOF' :: Parser T.Text (Value T.Text) #-}