1. Doug Burke
  2. swish

Commits

Doug Burke  committed 23971f5

Add Data.Interned.URI and use it in QName to try and optimize equality checks

  • Participants
  • Parent commits 9911191
  • Branches intern-branch

Comments (0)

Files changed (4)

File CHANGES

View file
  • Ignore whitespace
+
+An attempt to using some form of hash-based representation for
+URIs to speed up comparison
+
 0.5.0.3:
 
   - Missed a FlexibleInstances pragma for ghc 7.2.

File Data/Interned/URI.hs

View file
  • Ignore whitespace
+{-# LANGUAGE TypeFamilies, FlexibleInstances #-}
+
+--------------------------------------------------------------------------------
+--  See end of this file for licence information.
+--------------------------------------------------------------------------------
+-- |
+--  Module      :  URI
+--  Copyright   :  (c) 2011 Douglas Burke
+--  License     :  GPL V2
+--
+--  Maintainer  :  Douglas Burke
+--  Stability   :  experimental
+--  Portability :  TypeFamilies, FlexibleInstances
+--
+--  Support interning URIs.
+--
+--------------------------------------------------------------------------------
+
+module Data.Interned.URI
+       ( InternedURI
+       ) where
+
+import Data.String (IsString(..))
+import Data.Hashable
+import Data.Interned
+import Data.Maybe (fromMaybe)
+
+import Network.URI
+
+-- Could look at adding UNPACK statements before each component
+data InternedURI = InternedURI !Int !URI
+
+instance IsString InternedURI where
+  fromString = intern .
+               fromMaybe (error "Error: unable to create a URI.") .
+               parseURIReference
+              
+
+instance Eq InternedURI where
+  InternedURI a _ == InternedURI b _ = a == b
+
+instance Ord InternedURI where
+  compare (InternedURI a _) (InternedURI b _) = compare a b
+
+instance Show InternedURI where
+  showsPrec d (InternedURI _ b) = showsPrec d b
+
+instance Interned InternedURI where
+  type Uninterned InternedURI = URI
+  data Description InternedURI = DU !URI deriving (Eq) -- DU {-# UNPACK #-} !URI deriving (Eq) 
+  describe = DU
+  identify = InternedURI
+  identity (InternedURI i _) = i
+  cache = iuCache
+
+instance Uninternable InternedURI where
+  unintern (InternedURI _ b) = b 
+
+-- Rather than be clever, use the reverse of the string
+-- representation of the URI
+instance Hashable (Description InternedURI) where
+  hash = hashWithSalt 5381 -- use the stringSalt value from Data.Hashable
+  hashWithSalt salt (DU u) = hashWithSalt salt ((reverse . show) u)
+
+iuCache :: Cache InternedURI
+iuCache = mkCache
+{-# NOINLINE iuCache #-}
+
+--------------------------------------------------------------------------------
+--
+--  Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke
+--  All rights reserved.
+--
+--  This file is part of Swish.
+--
+--  Swish is free software; you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation; either version 2 of the License, or
+--  (at your option) any later version.
+--
+--  Swish is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with Swish; if not, write to:
+--    The Free Software Foundation, Inc.,
+--    59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+--
+--------------------------------------------------------------------------------

File Swish/Utils/QName.hs

View file
  • Ignore whitespace
 import Data.String (IsString(..))
 import Data.Maybe (fromMaybe)
 
+import Data.Interned (intern, unintern)
+import Data.Interned.URI (InternedURI)
+
 import qualified Data.Text as T
 
 ------------------------------------------------------------
 may not be a good idea (space vs time saving).
 -}
 
-data QName = QName !URI URI T.Text
+data QName = QName !InternedURI URI T.Text
 
 -- | This is not total since it will fail if the input string is not a valid URI.
 instance IsString QName where
   -}
   
   -- TODO: which is faster?
+  -- Now we have changed to InternedURI, we could use the
+  -- Ord instance of it, but it is unclear to me what the
+  -- ordering means in that case, and whether the semantics
+  -- matter here?
   (QName u1 _ _) <= (QName u2 _ _) = show u1 <= show u2
+  
   {-
   (QName _ uri1 l1) <= (QName _ uri2 l2) =
     if up1 /= up2 then up1 <= up2 else (ur1 ++ T.unpack l1) <= (ur2 ++ T.unpack l2)
       uri = fromMaybe (error ("Unable to combine " ++ show ns ++ " with " ++ l)) $ luri `relativeTo` ns
   -}
       
-  in QName uri ns local
+  in QName (intern uri) ns local
 
 {-
 
 qnameFromURI uri =
   let uf = uriFragment uri
       up = uriPath uri
-      q0 = QName uri uri ""
+      q0 = QName iuri uri ""
+      iuri = intern uri
   in case uf of
     "#"    -> q0
-    '#':xs -> QName uri (uri { uriFragment = "#" }) (T.pack xs)
+    '#':xs -> QName iuri (uri { uriFragment = "#" }) (T.pack xs)
     ""     -> case break (=='/') (reverse up) of
       ("",_) -> q0 -- path ends in / or is empty
       (_,"") -> q0 -- path contains no /
-      (rlname,rpath) -> QName uri (uri {uriPath = reverse rpath}) (T.pack (reverse rlname))
+      (rlname,rpath) -> QName iuri (uri {uriPath = reverse rpath}) (T.pack (reverse rlname))
       
     e -> error $ "Unexpected: uri=" ++ show uri ++ " has fragment='" ++ show e ++ "'" 
 
 -- | Returns the full URI of the QName (ie the combination of the
 -- namespace and local components).
 getQNameURI :: QName -> URI
-getQNameURI (QName u _ _) = u
+getQNameURI (QName u _ _) = unintern u
 
 {-
 Original used comparison of concatenated strings,

File swish.cabal

View file
  • Ignore whitespace
 Name:               swish
-Version:            0.5.0.3
+Version:            0.6.0.0
 Stability:          experimental
 License:            LGPL
 License-file:       LICENSE 
       mtl >= 1 && < 3,
       network >= 2.2 && < 2.4,
       directory >= 1.0 && < 1.2,
-      filepath >= 1.1 && < 1.3
+      filepath >= 1.1 && < 1.3,
+      hashable == 1.1.*,
+      intern == 0.8.*
 
    Exposed-Modules:
       Swish
       Swish.Utils.PartOrderedCollection
       Swish.Utils.QName
       Swish.Utils.ShowM
+      Data.Interned.URI
 
    other-modules:
       -- Paths_swish