Bryan O'Sullivan avatar Bryan O'Sullivan committed bbac11d

Escape and unescape potentially unsafe names.

This is needed to maintain compatibility with REST clients, as unfortunately
the Riak server doesn't do the escaping and unescaping itself.

Comments (0)

Files changed (7)

 name:                riak
-version:             0.3.0.2
+version:             0.3.1.0
 synopsis:            A Haskell client for the Riak decentralized data store
 description:
   A Haskell client library for the Riak decentralized data
     Network.Riak.Content
     Network.Riak.Debug
     Network.Riak.Escape
+    Network.Riak.Functions
     Network.Riak.JSON
     Network.Riak.JSON.Resolvable
     Network.Riak.Request
     random,
     riak-protobuf >= 0.14.0.0,
     stm,
+    text >= 0.11.0.6,
     time,
     vector >= 0.7
 

src/Network/Riak/Basic.hs

     ) where
 
 import Control.Applicative ((<$>))
+import Data.Maybe (fromMaybe)
 import Network.Riak.Connection.Internal
+import Network.Riak.Escape (unescape)
 import Network.Riak.Protocol.BucketProps
 import Network.Riak.Protocol.Content
 import Network.Riak.Protocol.ListKeysResponse
 listBuckets :: Connection -> IO (Seq.Seq T.Bucket)
 listBuckets conn = Resp.listBuckets <$> exchange conn Req.listBuckets
 
--- Fold over the buckets in the cluster.
+-- Fold over the keys in a bucket.
 --
 -- /Note/: this operation is expensive.  Do not use it in production.
 foldKeys :: Connection -> T.Bucket -> (a -> Key -> IO a) -> a -> IO a
 foldKeys conn bucket f z0 = do
   sendRequest conn $ Req.listKeys bucket
-  let loop z = do
+  let g z = f z . unescape
+      loop z = do
         ListKeysResponse{..} <- recvResponse conn
-        z1 <- F.foldlM f z keys
-        if maybe False id done
+        z1 <- F.foldlM g z keys
+        if fromMaybe False done
           then return z1
           else loop z1
   loop z0

src/Network/Riak/Escape.hs

-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleInstances, OverloadedStrings #-}
 
 -- |
 -- Module:      Network.Riak.Connection
 
 module Network.Riak.Escape
     (
-      escape
+      Escape(..)
     , unescape
     ) where
 
-import Blaze.ByteString.Builder (fromByteString, toByteString)
+import Blaze.ByteString.Builder (Builder, fromByteString, toByteString, toLazyByteString)
 import Blaze.ByteString.Builder.Word (fromWord8)
 import Control.Applicative ((<$>))
 import Data.Attoparsec as A
+import Data.Attoparsec.Lazy as AL
 import Data.Bits ((.|.), (.&.), shiftL, shiftR)
 import Data.ByteString (ByteString)
 import Data.Monoid (mappend, mempty)
+import Data.Text (Text)
+import Data.Word (Word8)
+import Network.Riak.Functions (mapEither)
 import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
 import qualified Data.ByteString.Unsafe as B
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
 
--- | URL-escape a string.
-escape :: ByteString -> ByteString
-escape = toByteString . B.foldl step mempty
+-- | The class of string-like types that can be URL-escaped and
+-- unescaped.
+class Escape e where
+    -- | URL-escape a string.
+    escape :: e -> L.ByteString
+    -- | URL-unescape a string.
+    unescape' :: L.ByteString -> Either String e
+
+-- | URL-unescape a string that is presumed to be properly escaped.
+-- If the string is invalid, an error will be thrown that cannot be
+-- caught from pure code.
+unescape :: Escape e => L.ByteString -> e
+unescape bs = case unescape' bs of
+                Left err -> error $ "Network.Riak.Escape.unescape: " ++ err
+                Right v  -> v
+{-# INLINE unescape #-}
+
+instance Escape ByteString where
+    escape = toLazyByteString . B.foldl escapeWord8 mempty
+    {-# INLINE escape #-}
+    unescape' = AL.eitherResult . AL.parse (toByteString <$> unescapeBS)
+    {-# INLINE unescape' #-}
+
+instance Escape L.ByteString where
+    escape = toLazyByteString . L.foldl escapeWord8 mempty
+    {-# INLINE escape #-}
+    unescape' = AL.eitherResult . AL.parse (toLazyByteString <$> unescapeBS)
+    {-# INLINE unescape' #-}
+
+instance Escape Text where
+    escape = escape . T.encodeUtf8
+    {-# INLINE escape #-}
+    unescape' lbs = case AL.parse (toByteString <$> unescapeBS) lbs of
+                     AL.Done _ bs    -> mapEither show id $ T.decodeUtf8' bs
+                     AL.Fail _ _ err -> Left err
+    {-# INLINE unescape' #-}
+
+instance Escape TL.Text where
+    escape = escape . TL.encodeUtf8
+    {-# INLINE escape #-}
+    unescape' lbs = case AL.parse (toLazyByteString <$> unescapeBS) lbs of
+                     AL.Done _ bs    -> mapEither show id $ TL.decodeUtf8' bs
+                     AL.Fail _ _ err -> Left err
+    {-# INLINE unescape' #-}
+
+instance Escape [Char] where
+    escape = escape . T.encodeUtf8 . T.pack
+    {-# INLINE escape #-}
+    unescape' = mapEither id T.unpack . unescape'
+    {-# INLINE unescape' #-}
+
+-- | URL-escape a byte from a bytestring.
+escapeWord8 :: Builder -> Word8 -> Builder
+escapeWord8 acc 32 = acc `mappend` fromWord8 43
+escapeWord8 acc i
+    | literal i = acc `mappend` fromWord8 i
+    | otherwise = acc `mappend` hex i
   where
-    step acc 32 = acc `mappend` fromWord8 43
-    step acc w | literal w = acc `mappend` fromWord8 w
-               | otherwise = acc `mappend` hex w
     literal w = w >= 97 && w <= 122 || w >= 65 && w <= 90 ||
                 w >= 48 && w <= 57 || w `B.elem` "$-.!*'(),"
     hex w = fromWord8 37 `mappend` d (w `shiftR` 4) `mappend` d (w .&. 0xf)
     d n | n < 10    = fromWord8 (n + 48)
         | otherwise = fromWord8 (n + 87)
+{-# INLINE escapeWord8 #-}
 
--- | URL-unescape a string.
-unescapeP :: Parser ByteString
-unescapeP = toByteString <$> go mempty
+-- | URL-unescape' a bytestring.
+unescapeBS :: Parser Builder
+unescapeBS = go mempty
   where
     go acc  = do
       s <- A.takeWhile $ \w -> w /= 37 && w /= 43
       if done
         then return (acc `mappend` fromByteString s)
         else rest
-
--- | URL-unescape a string.
-unescape :: ByteString -> Either String ByteString
-unescape s0 = eitherResult $ parse unescapeP s0 `feed` B.empty

src/Network/Riak/Functions.hs

+-- |
+-- Module:      Network.Riak.Functions
+-- Copyright:   (c) 2011 MailRank, Inc.
+-- License:     Apache
+-- Maintainer:  Bryan O'Sullivan <bos@mailrank.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- Useful functions.
+
+module Network.Riak.Functions
+    (
+      strict
+    , lazy
+    , mapEither
+    ) where
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString.Lazy.Internal as L
+
+strict :: L.ByteString -> B.ByteString
+strict = B.concat . L.toChunks
+{-# INLINE strict #-}
+
+lazy :: B.ByteString -> L.ByteString
+lazy s | B.null s  = L.Empty
+       | otherwise = L.Chunk s L.Empty
+{-# INLINE lazy #-}
+
+mapEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d
+mapEither f _ (Left l)  = Left (f l)
+mapEither _ g (Right r) = Right (g r)
+{-# INLINE mapEither #-}

src/Network/Riak/Request.hs

 {-# LANGUAGE OverloadedStrings #-}
 
+-- |
+-- Module:      Network.Riak.Request
+-- Copyright:   (c) 2011 MailRank, Inc.
+-- License:     Apache
+-- Maintainer:  Bryan O'Sullivan <bos@mailrank.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- Smart constructors for Riak types.  These functions correctly
+-- URL-escape bucket, key, and link names.  You should thus use them
+-- in preference to the raw data constructors.
+
 module Network.Riak.Request
     (
     -- * Connection management
     , Del.DeleteRequest
     , delete
     -- * Metadata
+    , Link.Link
+    , link
     , ListBucketsRequest
     , listBuckets
     , Keys.ListKeysRequest
 import Network.Riak.Protocol.MapReduceRequest
 import Network.Riak.Protocol.PingRequest
 import Network.Riak.Types.Internal hiding (MessageTag(..))
+import Network.Riak.Escape (escape)
 import qualified Network.Riak.Protocol.DeleteRequest as Del
+import qualified Network.Riak.Protocol.Link as Link
 import qualified Network.Riak.Protocol.GetBucketRequest as GetBucket
 import qualified Network.Riak.Protocol.GetRequest as Get
 import qualified Network.Riak.Protocol.ListKeysRequest as Keys
 import qualified Network.Riak.Protocol.PutRequest as Put
 import qualified Network.Riak.Protocol.SetBucketRequest as SetBucket
 
+-- | Create a ping request.
 ping :: PingRequest
 ping = PingRequest
 {-# INLINE ping #-}
 
+-- | Create a client-ID request.
 getClientID :: GetClientIDRequest
 getClientID = GetClientIDRequest
 {-# INLINE getClientID #-}
 
+-- | Create a server-info request.
 getServerInfo :: GetServerInfoRequest
 getServerInfo = GetServerInfoRequest
 {-# INLINE getServerInfo #-}
 
+-- | Create a get request.  The bucket and key names are URL-escaped.
 get :: Bucket -> Key -> R -> Get.GetRequest
-get bucket key r = Get.GetRequest { Get.bucket = bucket
-                                  , Get.key = key
+get bucket key r = Get.GetRequest { Get.bucket = escape bucket
+                                  , Get.key = escape key
                                   , Get.r = fromQuorum r }
 {-# INLINE get #-}
 
+-- | Create a put request.  The bucket and key names are URL-escaped.
+-- Any 'Link' values inside the 'Content' are assumed to have been
+-- constructed with the 'link' function, and hence /not/ escaped.
 put :: Bucket -> Key -> Maybe VClock -> Content -> W -> DW -> Bool
     -> Put.PutRequest
 put bucket key mvclock cont mw mdw returnBody =
-    Put.PutRequest bucket key (fromVClock <$> mvclock) cont
+    Put.PutRequest (escape bucket) (escape key) (fromVClock <$> mvclock) cont
                    (fromQuorum mw) (fromQuorum mdw) (Just returnBody)
 {-# INLINE put #-}
 
+-- | Create a link.  The bucket and key names are URL-escaped.
+link :: Bucket -> Key -> Tag -> Link.Link
+link bucket key = Link.Link (Just (escape bucket)) (Just (escape key)) . Just
+{-# INLINE link #-}
+
+-- | Create a delete request.  The bucket and key names are URL-escaped.
 delete :: Bucket -> Key -> RW -> Del.DeleteRequest
-delete bucket key rw = Del.DeleteRequest bucket key (fromQuorum rw)
+delete bucket key rw = Del.DeleteRequest (escape bucket) (escape key)
+                                         (fromQuorum rw)
 {-# INLINE delete #-}
 
+-- | Create a list-buckets request.
 listBuckets :: ListBucketsRequest
 listBuckets = ListBucketsRequest
 {-# INLINE listBuckets #-}
 
+-- | Create a list-keys request.  The bucket name is URL-escaped.
 listKeys :: Bucket -> Keys.ListKeysRequest
-listKeys = Keys.ListKeysRequest
+listKeys = Keys.ListKeysRequest . escape
 {-# INLINE listKeys #-}
 
+-- | Create a get-bucket request.  The bucket name is URL-escaped.
 getBucket :: Bucket -> GetBucket.GetBucketRequest
-getBucket bucket = GetBucket.GetBucketRequest bucket
+getBucket = GetBucket.GetBucketRequest . escape
 {-# INLINE getBucket #-}
 
+-- | Create a set-bucket request.  The bucket name is URL-escaped.
 setBucket :: Bucket -> BucketProps -> SetBucket.SetBucketRequest
-setBucket bucket props = SetBucket.SetBucketRequest bucket props
+setBucket = SetBucket.SetBucketRequest . escape
 {-# INLINE setBucket #-}
 
+-- | Create a map-reduce request.
 mapReduce :: Job -> MapReduceRequest
 mapReduce (JSON bs)   = MapReduceRequest bs "application/json"
 mapReduce (Erlang bs) = MapReduceRequest bs "application/x-erlang-binary"

src/Network/Riak/Response.hs

 {-# LANGUAGE RecordWildCards #-}
 
+-- |
+-- Module:      Network.Riak.Request
+-- Copyright:   (c) 2011 MailRank, Inc.
+-- License:     Apache
+-- Maintainer:  Bryan O'Sullivan <bos@mailrank.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- Smart deconstructors for Riak types.  These functions correctly
+-- URL-unescape bucket, key, and link names.  You should thus use them
+-- in preference to direct pattern matching against raw data
+-- constructors.
+
 module Network.Riak.Response
     (
     -- * Connection management
     -- * Metadata
     , listBuckets
     , getBucket
+    , unescapeLinks
     ) where
 
+import Control.Applicative ((<$>))
+import Data.Maybe (fromMaybe)
+import Network.Riak.Escape (unescape)
 import Network.Riak.Protocol.BucketProps
 import Network.Riak.Protocol.Content
 import Network.Riak.Protocol.GetBucketResponse
 import Network.Riak.Protocol.GetClientIDResponse
 import Network.Riak.Protocol.GetResponse
+import Network.Riak.Protocol.Link
 import Network.Riak.Protocol.ListBucketsResponse
 import Network.Riak.Protocol.PutResponse
 import Network.Riak.Types.Internal hiding (MessageTag(..))
 getClientID = client_id
 {-# INLINE getClientID #-}
 
+-- | Construct a get response.  Bucket and key names in links are
+-- URL-unescaped.
 get :: Maybe GetResponse -> Maybe (Seq.Seq Content, VClock)
 get (Just (GetResponse content (Just vclock)))
-      = Just (content, VClock vclock)
+      = Just (unescapeLinks <$> content, VClock vclock)
 get _ = Nothing
 {-# INLINE get #-}
 
+-- | Construct a put response.  Bucket and key names in links are
+-- URL-unescaped.
 put :: PutResponse -> (Seq.Seq Content, VClock)
-put PutResponse{..} = (content, VClock (maybe L.empty id vclock))
+put PutResponse{..} = (unescapeLinks <$> content,
+                       VClock (fromMaybe L.empty vclock))
 {-# INLINE put #-}
 
-listBuckets :: ListBucketsResponse -> (Seq.Seq Bucket)
-listBuckets = buckets
+-- | Construct a list-buckets response.  Bucket names are unescaped.
+listBuckets :: ListBucketsResponse -> Seq.Seq Bucket
+listBuckets = fmap unescape . buckets
 {-# INLINE listBuckets #-}
 
 getBucket :: GetBucketResponse -> BucketProps
 getBucket = props
 {-# INLINE getBucket #-}
+
+-- | URL-unescape the names of keys and buckets in the links of a
+-- 'Content' value.
+unescapeLinks :: Content -> Content
+unescapeLinks c = c { links = go <$> links c }
+  where go l = l { bucket = unescape <$> bucket l, key = unescape <$> key l }

src/Network/Riak/Value.hs

 import Network.Riak.Protocol.GetResponse (GetResponse(..))
 import Network.Riak.Protocol.PutResponse (PutResponse(..))
 import Network.Riak.Resolvable (ResolvableMonoid(..))
+import Network.Riak.Response (unescapeLinks)
 import Network.Riak.Types.Internal hiding (MessageTag(..))
 import qualified Data.Aeson.Parser as Aeson
 import qualified Data.Aeson.Types as Aeson
 
 convert :: IsContent v => Seq.Seq Content -> IO [v]
 convert = go [] [] . toList
-    where go cs vs (x:xs) = case fromContent x of
+    where go cs vs (x:xs) = case fromContent y of
                               Just v -> go cs (v:vs) xs
-                              _      -> go (x:cs) vs xs
+                              _      -> go (y:cs) vs xs
+              where y = unescapeLinks x
           go [] vs _      = return (reverse vs)
           go cs _  _      = typeError "Network.Riak.Value" "convert" $
                             show (length cs) ++ " values failed conversion: " ++
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.