Commits

Bryan O'Sullivan  committed b2dea00 Merge

Merge

  • Participants
  • Parent commits 9b1d7fc, 0d302a5

Comments (0)

Files changed (7)

File Data/Aeson/Encode.hs

-{-# LANGUAGE BangPatterns, OverloadedStrings #-}
+{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-}
 
 -- |
 -- Module:      Data.Aeson.Encode
 -- Most frequently, you'll probably want to encode straight to UTF-8
 -- (the standard JSON encoding) using 'encode'.
 --
--- You can convert a 'Builder' (as returned by 'fromValue') to a
--- string using e.g. 'toLazyText'.
+-- You can use the conversions to 'Builder's when embedding JSON messages as
+-- parts of a protocol.
+module Data.Aeson.Encode
+    ( encode
 
-module Data.Aeson.Encode
-    (
-      fromValue
-    , encode
+#if MIN_VERSION_bytestring(0,10,4)
+    -- * Encoding to Builders
+    , encodeToByteStringBuilder
+    , encodeToTextBuilder
+#else
+    -- * Encoding to Text Builders
+    , encodeToTextBuilder
+#endif
+
+    -- * Deprecated
+    , fromValue
     ) where
 
-import Data.Aeson.Types (ToJSON(..), Value(..))
+import Data.Aeson.Types (Value(..))
 import Data.Monoid (mappend)
 import Data.Scientific (Scientific, coefficient, base10Exponent, scientificBuilder)
 import Data.Text.Lazy.Builder
 import Data.Text.Lazy.Builder.Int (decimal)
-import Data.Text.Lazy.Encoding (encodeUtf8)
 import Numeric (showHex)
-import qualified Data.ByteString.Lazy as L
 import qualified Data.HashMap.Strict as H
 import qualified Data.Text as T
 import qualified Data.Vector as V
 
--- | Encode a JSON value to a 'Builder'.  You can convert this to a
--- string using e.g. 'toLazyText', or encode straight to UTF-8 (the
--- standard JSON encoding) using 'encode'.
+#if MIN_VERSION_bytestring(0,10,4)
+import Data.Aeson.Encode.ByteString (encode, encodeToByteStringBuilder)
+#else
+import Data.Aeson.Types (ToJSON(toJSON))
+import qualified Data.ByteString.Lazy    as BL
+import qualified Data.Text.Lazy.Builder  as TLB
+import qualified Data.Text.Lazy.Encoding as TLE
+
+-- | Encode a JSON 'Value' as a UTF-8 encoded 'BL.ByteString'.
+encode :: ToJSON a => a -> BL.ByteString
+encode = TLE.encodeUtf8 . TLB.toLazyText . encodeToTextBuilder . toJSON
+#endif
+
+-- | Encode a JSON 'Value' to a 'Builder', which can be embedded efficiently
+-- in a text-based protocol.
+encodeToTextBuilder :: Value -> Builder
+encodeToTextBuilder =
+    go
+  where
+    go Null       = {-# SCC "go/Null" #-} "null"
+    go (Bool b)   = {-# SCC "go/Bool" #-} if b then "true" else "false"
+    go (Number s) = {-# SCC "go/Number" #-} fromScientific s
+    go (String s) = {-# SCC "go/String" #-} string s
+    go (Array v)
+        | V.null v = {-# SCC "go/Array" #-} "[]"
+        | otherwise = {-# SCC "go/Array" #-}
+                      singleton '[' <>
+                      go (V.unsafeHead v) <>
+                      V.foldr f (singleton ']') (V.unsafeTail v)
+      where f a z = singleton ',' <> go a <> z
+    go (Object m) = {-# SCC "go/Object" #-}
+        case H.toList m of
+          (x:xs) -> singleton '{' <> one x <> foldr f (singleton '}') xs
+          _      -> "{}"
+      where f a z     = singleton ',' <> one a <> z
+            one (k,v) = string k <> singleton ':' <> go v
+
+{-# DEPRECATED fromValue "Use 'encodeToTextBuilder' instead" #-}
 fromValue :: Value -> Builder
-fromValue Null = {-# SCC "fromValue/Null" #-} "null"
-fromValue (Bool b) = {-# SCC "fromValue/Bool" #-}
-                     if b then "true" else "false"
-fromValue (Number s) = {-# SCC "fromValue/Number" #-} fromScientific s
-fromValue (String s) = {-# SCC "fromValue/String" #-} string s
-fromValue (Array v)
-    | V.null v = {-# SCC "fromValue/Array" #-} "[]"
-    | otherwise = {-# SCC "fromValue/Array" #-}
-                  singleton '[' <>
-                  fromValue (V.unsafeHead v) <>
-                  V.foldr f (singleton ']') (V.unsafeTail v)
-  where f a z = singleton ',' <> fromValue a <> z
-fromValue (Object m) = {-# SCC "fromValue/Object" #-}
-    case H.toList m of
-      (x:xs) -> singleton '{' <> one x <> foldr f (singleton '}') xs
-      _      -> "{}"
-  where f a z     = singleton ',' <> one a <> z
-        one (k,v) = string k <> singleton ':' <> fromValue v
+fromValue = encodeToTextBuilder
 
 string :: T.Text -> Builder
 string s = {-# SCC "string" #-} singleton '"' <> quote s <> singleton '"'
   where
     e = base10Exponent s
 
--- | Efficiently serialize a JSON value as a lazy 'L.ByteString'.
-encode :: ToJSON a => a -> L.ByteString
-encode = {-# SCC "encode" #-} encodeUtf8 . toLazyText . fromValue .
-         {-# SCC "toJSON" #-} toJSON
-{-# INLINE encode #-}
-
 (<>) :: Builder -> Builder -> Builder
 (<>) = mappend
 {-# INLINE (<>) #-}

File Data/Aeson/Encode/ByteString.hs

+{-# LANGUAGE BangPatterns, OverloadedStrings #-}
+
+-- |
+-- Module:      Data.Aeson.EncodeUtf8
+-- Copyright:   (c) 2011 MailRank, Inc.
+--              (c) 2013 Simon Meier <iridcode@gmail.com>
+-- License:     Apache
+-- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- Efficiently serialize a JSON value using the UTF-8 encoding.
+
+module Data.Aeson.Encode.ByteString
+    ( encode
+    , encodeToByteStringBuilder
+    ) where
+
+import Prelude hiding (null)
+import Data.Aeson.Types (ToJSON(..), Value(..))
+import Data.Char (ord)
+import Data.Scientific (Scientific, coefficient, base10Exponent, formatScientific, FPFormat(Generic))
+import Data.Word (Word8)
+import Data.Monoid (mappend)
+import           Data.ByteString.Builder      as B
+import           Data.ByteString.Builder.Prim as BP
+import qualified Data.ByteString.Lazy as L
+import qualified Data.HashMap.Strict as HMS
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.Vector as V
+
+(<>) :: Builder -> Builder -> Builder
+(<>) = mappend
+{-# INLINE (<>) #-}
+infixr 6 <>
+
+-- | Efficiently serialize a JSON value as a lazy 'L.ByteString'.
+encode :: ToJSON a => a -> L.ByteString
+encode = B.toLazyByteString . encodeToByteStringBuilder . toJSON
+
+-- | Encode a JSON value to a ByteString 'B.Builder'. Use this function if you
+-- must prepend or append further bytes to the encoded JSON value.
+encodeToByteStringBuilder :: Value -> Builder
+encodeToByteStringBuilder Null       = null
+encodeToByteStringBuilder (Bool b)   = bool b
+encodeToByteStringBuilder (Number n) = number n
+encodeToByteStringBuilder (String s) = string s
+encodeToByteStringBuilder (Array v)  = array v
+encodeToByteStringBuilder (Object m) = object m
+
+null :: Builder
+null = BP.primBounded (ascii4 ('n',('u',('l','l')))) ()
+
+bool :: Bool -> Builder
+bool = BP.primBounded (BP.condB id (ascii4 ('t',('r',('u','e'))))
+                                   (ascii5 ('f',('a',('l',('s','e'))))))
+
+array :: V.Vector Value -> Builder
+array v
+  | V.null v  = B.char8 '[' <> B.char8 ']'
+  | otherwise = B.char8 '[' <>
+                encodeToByteStringBuilder (V.unsafeHead v) <>
+                V.foldr withComma (B.char8 ']') (V.unsafeTail v)
+  where
+    withComma a z = B.char8 ',' <> encodeToByteStringBuilder a <> z
+
+object :: HMS.HashMap T.Text Value -> Builder
+object m = case HMS.toList m of
+    (x:xs) -> B.char8 '{' <> one x <> foldr withComma (B.char8 '}') xs
+    _      -> B.char8 '{' <> B.char8 '}'
+  where
+    withComma a z = B.char8 ',' <> one a <> z
+    one (k,v)     = string k <> B.char8 ':' <> encodeToByteStringBuilder v
+
+string :: T.Text -> B.Builder
+string t =
+    B.char8 '"' <> TE.encodeUtf8BuilderEscaped escapeAscii t <> B.char8 '"'
+  where
+    escapeAscii :: BP.BoundedPrim Word8
+    escapeAscii =
+        BP.condB (== c2w '\\'  ) (ascii2 ('\\','\\')) $
+        BP.condB (== c2w '\"'  ) (ascii2 ('\\','"' )) $
+        BP.condB (>= c2w '\x20') (BP.liftFixedToBounded BP.word8) $
+        BP.condB (== c2w '\n'  ) (ascii2 ('\\','n' )) $
+        BP.condB (== c2w '\r'  ) (ascii2 ('\\','r' )) $
+        BP.condB (== c2w '\t'  ) (ascii2 ('\\','t' )) $
+        (BP.liftFixedToBounded hexEscape) -- fallback for chars < 0x20
+
+    c2w = fromIntegral . ord
+
+    hexEscape :: BP.FixedPrim Word8
+    hexEscape = (\c -> ('\\', ('u', fromIntegral c))) BP.>$<
+        BP.char8 BP.>*< BP.char8 BP.>*< BP.word16HexFixed
+
+number :: Scientific -> Builder
+number s
+    | e < 0     = B.string8 $ formatScientific Generic Nothing s
+    | otherwise = B.integerDec (coefficient s * 10 ^ e)
+  where
+    e = base10Exponent s
+
+
+{-# INLINE ascii2 #-}
+ascii2 :: (Char, Char) -> BP.BoundedPrim a
+ascii2 cs = BP.liftFixedToBounded $ (const cs) BP.>$< BP.char7 BP.>*< BP.char7
+
+{-# INLINE ascii4 #-}
+ascii4 :: (Char, (Char, (Char, Char))) -> BP.BoundedPrim a
+ascii4 cs = BP.liftFixedToBounded $ (const cs) >$<
+    BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7
+
+{-# INLINE ascii5 #-}
+ascii5 :: (Char, (Char, (Char, (Char, Char)))) -> BP.BoundedPrim a
+ascii5 cs = BP.liftFixedToBounded $ (const cs) >$<
+    BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7

File Data/Aeson/Types/Internal.hs

            | Number !Scientific
            | Bool !Bool
            | Null
-             deriving (Eq, Show, Typeable, Data)
+             deriving (Eq, Show, Typeable)
 
 -- | A newtype wrapper for 'UTCTime' that uses the same non-standard
 -- serialization format as Microsoft .NET, whose @System.DateTime@
   description: Use blaze-builder instead of bytestring >= 0.10
   default: False
 
+flag new-bytestring-builder
+  description: Use the new bytestring builder available in bytestring >= 0.10.4.0 
+  default: False
+
 library
   exposed-modules:
     Data.Aeson
     Data.Aeson.Types.Instances
     Data.Aeson.Types.Internal
 
+  if flag(new-bytestring-builder)
+    other-modules: Data.Aeson.Encode.ByteString
+    build-depends: bytestring >= 0.10.4.0,
+                   text >= 1.1.0.0 
+  else 
+    build-depends: bytestring < 0.10.4.0,
+                   text >= 0.11.1.0
+
   if impl(ghc >= 7.2.1)
     cpp-options: -DGENERICS
     build-depends: ghc-prim >= 0.2, dlist >= 0.2
   build-depends:
     attoparsec >= 0.11.1.0,
     base == 4.*,
-    bytestring,
     containers,
     deepseq,
     hashable >= 1.1.2.0,
     old-locale,
     syb,
     template-haskell >= 2.4,
-    text >= 0.11.1.0,
     time,
     unordered-containers >= 0.1.3.0,
     vector >= 0.7.1,
     template-haskell,
     test-framework,
     test-framework-quickcheck2,
+    test-framework-hunit,
+    HUnit,
     text,
     time,
     unordered-containers,

File benchmarks/CompareWithJSON.hs

 import Blaze.ByteString.Builder.Char.Utf8 (fromString)
 import Control.DeepSeq (NFData(rnf))
 import Criterion.Main
+import qualified Data.Aeson.Encode as A
 import qualified Data.Aeson as A
 import qualified Data.ByteString.Lazy as BL
 import qualified Text.JSON as J
+import qualified Data.Text.Lazy          as TL
+import qualified Data.Text.Lazy.Builder  as TLB
+import qualified Data.Text.Lazy.Encoding as TLE
 
 #if !MIN_VERSION_bytestring(0,10,0)
 import qualified Data.ByteString.Lazy.Internal as BL
 encodeJ :: J.JSValue -> BL.ByteString
 encodeJ = toLazyByteString . fromString . J.encode
 
+encodeToText :: A.Value -> TL.Text
+encodeToText = TLB.toLazyText . A.encodeToTextBuilder . A.toJSON
+
+encodeViaText :: A.Value -> BL.ByteString
+encodeViaText = TLE.encodeUtf8 . encodeToText
+
 main :: IO ()
 main = do
   let enFile = "json-data/twitter100.json"
       ]
     , bgroup "encode" [
         bgroup "en" [
-          bench "aeson" $ nf A.encode (decodeA enA)
+          bench "aeson-to-bytestring" $ nf A.encode (decodeA enA)
+        , bench "aeson-via-text-to-bytestring" $ nf encodeViaText (decodeA enA)
+        , bench "aeson-to-text" $ nf encodeToText (decodeA enA)
         , bench "json"  $ nf encodeJ (decodeJ enJ)
         ]
       , bgroup "jp" [
-          bench "aeson" $ nf A.encode (decodeA jpA)
+          bench "aeson-to-bytestring" $ nf A.encode (decodeA jpA)
+        , bench "aeson-via-text-to-bytestring" $ nf encodeViaText (decodeA jpA)
+        , bench "aeson-to-text" $ nf encodeToText (decodeA jpA)
         , bench "json"  $ nf encodeJ (decodeJ jpJ)
         ]
       ]

File benchmarks/aeson-benchmarks.cabal

     blaze-builder,
     bytestring,
     criterion,
-    json
+    json,
+    text
 
 executable aeson-benchmark-aeson-encode
   main-is: AesonEncode.hs

File tests/Properties.hs

 {-# LANGUAGE CPP, OverloadedStrings, RecordWildCards, ScopedTypeVariables #-}
 
+import Control.Monad (forM)
+import Data.Aeson (eitherDecode)
 import Data.Aeson.Encode
 import Data.Aeson.Parser (value)
 import Data.Aeson.Types
 import Test.Framework (Test, defaultMain, testGroup)
 import Test.Framework.Providers.QuickCheck2 (testProperty)
+import Test.Framework.Providers.HUnit (testCase)
+import Test.HUnit                     (assertFailure, assertEqual)
 import Test.QuickCheck (Arbitrary(..))
 import qualified Data.Vector as V
 import qualified Data.Attoparsec.Lazy as L
 import qualified Data.ByteString.Lazy.Char8 as L
 import qualified Data.Text as T
+import qualified Data.Text.Lazy.Builder as TLB
+import qualified Data.Text.Lazy.Encoding as TLE
 import qualified Data.HashMap.Strict as H
 import Data.Time.Clock (UTCTime(..))
 import Data.Time (ZonedTime(..))
 import qualified Data.Map as Map
 #endif
 
+{-
 roundTripCaml :: String -> Bool
 roundTripCaml s = s == (camlFrom '_' $ camlTo '_' s)
   where
     camlFrom :: Char -> String -> String
     camlFrom c = concatMap capitalize $ split c
+-}
 
 encodeDouble :: Double -> Double -> Bool
 encodeDouble num denom
     result = parse parser ()
 
 main :: IO ()
-main = defaultMain tests
+main = do
+    comparisonTest <- encoderComparisonTests
+    defaultMain (comparisonTest : tests)
 
 #ifdef GHC_GENERICS
 type P6 = Product6 Int Bool String (Approx Double) (Int, Approx Double) ()
       testProperty "encodeDouble" encodeDouble
     , testProperty "encodeInteger" encodeInteger
     ],
-  testGroup "camlCase" [
-      testProperty "camlTo" $ roundTripCaml "AnApiMethod"
-    , testProperty "camlTo" $ roundTripCaml "anotherMethodType"
-    ],
+  -- testGroup "camlCase" [
+  --     testProperty "camlTo" $ roundTripCaml "AnApiMethod"
+  --   , testProperty "camlTo" $ roundTripCaml "anotherMethodType"
+  --   ],
   testGroup "roundTrip" [
       testProperty "Bool" $ roundTripEq True
     , testProperty "Double" $ roundTripEq (1 :: Approx Double)
     ]
 #endif
   ]
+
+
+------------------------------------------------------------------------------
+-- Comparison between bytestring and text encoders
+------------------------------------------------------------------------------
+
+encoderComparisonTests :: IO Test
+encoderComparisonTests = do
+    encoderTests <- forM testFiles $ \file0 -> do
+        let file = "benchmarks/json-data/" ++ file0
+        return $ testCase file $ do
+            inp <- L.readFile file
+            case eitherDecode inp of
+              Left  err -> assertFailure $ "Decoding failure: " ++ err
+              Right val -> assertEqual "" (encode val) (encodeViaText val)
+    return $ testGroup "Compare bytestring and text encoders" encoderTests
+  where
+    encodeViaText :: Value -> L.ByteString
+    encodeViaText =
+        TLE.encodeUtf8 . TLB.toLazyText . encodeToTextBuilder . toJSON
+
+    testFiles =
+      [ "example.json"
+      , "integers.json"
+      , "jp100.json"
+      , "numbers.json"
+      , "twitter10.json"
+      , "twitter20.json"
+      , "geometry.json"
+      , "jp10.json"
+      , "jp50.json"
+      , "twitter1.json"
+      , "twitter100.json"
+      , "twitter50.json"
+      ]