Commits

Doug Burke committed 5829eae Merge

Merge of intern-branch: switch to hashing from Data.Hashable rather than Swish.Utils.MiscHelpers

Comments (0)

Files changed (9)

+0.6.0.1:
+
+  - use the hashing interface provided by Data.Hashable rather than
+    Swish.Utils.MiscHelpers.
+
 0.6.0.0:
 
   - use the intern package to create Data.Interned.URI and use

Data/Interned/URI.hs

 instance Uninternable InternedURI where
   unintern (InternedURI _ b) = b 
 
--- Rather than be clever, use the reverse of the string
--- representation of the URI
+-- Rather than access the URI components, just use the reverse of the
+-- string representation of the URI.
 instance Hashable (Description InternedURI) where
   hash = hashWithSalt 5381 -- use the stringSalt value from Data.Hashable
   hashWithSalt salt (DU u) = hashWithSalt salt ((reverse . show) u)

Swish/RDF/GraphClass.hs

 import qualified Data.Foldable as F
 import qualified Data.Traversable as T
 
+import Data.Hashable (Hashable(..))
+
 import Data.List (foldl', union, (\\))
 
 --  NOTE:  I wanted to declare this as a subclass of Functor, but
     
   -- | Calculate the hash of the label using the supplied seed.
   labelHash   :: Int -> lb -> Int     
+  
+  -- could provide a default of 
+  --   labelHash = hashWithSalt
+  -- but this would then force a Hashable constraint
     
   -- | Extract the local id from a variable node.                 
   getLocal    :: lb -> String
               }
             deriving (Eq, Functor, F.Foldable, T.Traversable)
 
+instance (Hashable lb) => Hashable (Arc lb) where
+  hash (Arc s p o) = hash s `hashWithSalt` p `hashWithSalt` o
+  hashWithSalt salt (Arc s p o) = salt `hashWithSalt` s `hashWithSalt` p `hashWithSalt` o
+
 -- | Return the subject of the arc.
 arcSubj :: Arc lb -> lb
 arcSubj = asubj

Swish/RDF/GraphMatch.hs

 
 import Data.Ord (comparing)
 import Data.List (foldl', nub, sortBy, partition)
+import Data.Hashable (combine)
 import qualified Data.List as L
 
 import Swish.RDF.GraphClass (Arc(..), Label(..),
                               makeLookupMap, listLookupMap, mapFind, mapReplaceAll,
                               mapAddIfNew, mapReplaceMap, mapMerge)
 import Swish.Utils.ListHelpers (select, equiv, pairSort, pairGroup, pairUngroup)
-import Swish.Utils.MiscHelpers (hash, hashModulus)
 
 --------------------------
 --  Label index value type
     makeLabel locnam = error $ "makeLabel for ScopedLabel: "++locnam
     labelIsVar (ScopedLabel _ lab)   = labelIsVar lab
     labelHash seed (ScopedLabel scope lab)
-        | labelIsVar lab    = hash seed $ show scope ++ "???"
+        | labelIsVar lab    = seed `combine` scope -- MH.hash seed $ show scope ++ "???"
         | otherwise         = labelHash seed lab
 
 instance (Label lb) => Eq (ScopedLabel lb) where
         assert (ev1==ev2) -- "GraphMatch2: Equivalence class value mismatch" $
         $ try glp
 
+-- this was in Swish.Utils.MiscHelpers along with a simple hash-based function
+-- based on Sedgewick, Algorithms in C, p233. As we have now moved to using
+-- Data.Hashable it is not clear whether this is still necessary or sensible.
+--
+hashModulus :: Int
+hashModulus = 16000001
+
 -- | Returns a string representation  of a LabelMap value
 --
 showLabelMap :: (Label lb) => LabelMap lb -> String
 
 hashVal :: (Label lb) => Int -> lb -> Int
 hashVal seed lab =
-    if labelIsVar lab then hash seed "???" else labelHash seed lab
+  if labelIsVar lab then seed `combine` 23 else labelHash seed lab
+  -- if labelIsVar lab then hash seed "???" else labelHash seed lab
 
 equivalenceClasses :: 
   (Label lb) 
         newIndex l
             | labelIsVar l  = mapAdjacent l     -- adjacency classifies variable labels
             | otherwise     = hashVal gen l     -- otherwise rehash (to disentangle collisions)
-        mapAdjacent l       = sum (sigsOver l) `rem` hashModulus
+        -- mapAdjacent l       = sum (sigsOver l) `rem` hashModulus
+        mapAdjacent l       = sum (sigsOver l) `combine` hashModulus -- is this a sensible replacement for `rem` MH.hashModulus        
         sigsOver l          = select (hasLabel l) gs (arcSignatures lmap gs)
 
 -- | Return list of distinct labels used in a graph
         sigCalc (s,p,o)  =
             ( labelVal2 s +
               labelVal2 p * 3 +
-              labelVal2 o * 5 ) `rem` hashModulus
+              labelVal2 o * 5 )
+            `combine` hashModulus
+            -- `rem` hashModulus
+          
         labelVal         = mapLabelIndex lmap
         labelVal2        = uncurry (*) . labelVal
 

Swish/RDF/GraphMem.hs

 
 import Swish.RDF.GraphClass
 import Swish.RDF.GraphMatch
-import Swish.Utils.MiscHelpers
-    ( hash )
+-- import Swish.Utils.MiscHelpers (hash)
 
+import Data.Hashable (Hashable(..), combine)
 import Data.Ord (comparing)
 
 import qualified Data.Foldable as F
     = LF String
     | LV String
 
+instance Hashable LabelMem where
+  hash (LF l) = 1 `hashWithSalt` l
+  hash (LV l) = 2 `hashWithSalt` l
+  hashWithSalt salt (LF l) = salt `combine` 1 `hashWithSalt` l
+  hashWithSalt salt (LV l) = salt `combine` 2 `hashWithSalt` l
+
 instance Label LabelMem where
     labelIsVar (LV _)   = True
     labelIsVar _        = False
     getLocal   (LV loc) = loc
     getLocal   lab      = error "getLocal of non-variable label: " ++ show lab
     makeLabel           = LV 
-    labelHash  seed lb  = hash seed (show lb)
+    -- labelHash  seed lb  = hash seed (show lb)
+    labelHash = hashWithSalt
 
 instance Eq LabelMem where
     (LF l1) == (LF l2)  = l1 == l2

Swish/Utils/MiscHelpers.hs

 --  This module defines a random set of helper functions
 --  used by the graph handling code.
 --
+--  This module is *deprecated* and will be removed at the next possible
+--  release.
+--
 --------------------------------------------------------------------------------
 
 module Swish.Utils.MiscHelpers
-      ( hash -- RDFGraph, GraphMem, GraphMatch
-      , hashModulus -- GraphMatch
-      
+      ( hash
+      , hashModulus
       )
 where
 

Swish/Utils/QName.hs

 instance IsString QName where
   fromString = qnameFromString
                
+-- | Equality is determined by a case sensitive comparison of the               
+-- URI.
 instance Eq QName where
   -- see qnEq comments below
   (QName u1 _ _) == (QName u2 _ _) = u1 == u2
 
--- ugly, use show instance
+-- ugly, use show instance OR switch to the ordering of InternedURI
     
 instance Ord QName where
   {-
         (up2,ur2) = splitAt n u2
   -}
   
--- The format of show QName may well change to remove the <>
+-- | The format used to display the URI is @<uri>@.
 instance Show QName where
     show (QName u _ _) = "<" ++ show u ++ ">"
 
   .
   Changes:
   .
+  [Version 0.6.0.1] Moved to using hashing routine using the Data.Hashable
+  interface rather than Swish.Utils.MiscHelpers which is deprecated.
+  .
   [Version 0.6.0.0] Add Data.Interned.URI and use it to speed up the QName
   equality check.
   .

tests/GraphTest.hs

 --
 --------------------------------------------------------------------------------
 
+{- 
+
+Note: after changing the hash module the order and values of some
+of the tests; I have not checked that the new ordering makes sense.
+
+-}
+
 module Main where
 
 import qualified Data.Foldable as F
 import Data.Ord (comparing)
 
 import Swish.Utils.ListHelpers
-import Swish.Utils.MiscHelpers
 import Swish.RDF.GraphClass (Arc(..), LDGraph(..),
                              Label(..), arc,
                              arcFromTriple,arcToTriple)
     , testSubset "08" False [1,2,3]       []
     ]
 
--- hash
-
-testHash :: String -> Bool -> Int -> Int -> Test
-testHash lab eq h1 h2 = testeq ("Hash"++lab ) eq (h1 == h2)
-
-testHashEq :: String -> Int -> Int -> Test
-testHashEq lab = testeq ("Hash"++lab ) 
-
-testHashSuite :: Test
-testHashSuite = TestList
-    [ testHash "01" True  (hash 0 base1) (hash 0 base1)
-    , testHash "02" True  (hash 2 "")    (hash 2 "")
-    , testHash "03" False (hash 3 base1) (hash 3 base2)
-    , testHash "04" False (hash 4 base1) (hash 5 base1)
-    , testHash "05" False (hash 2 "")    (hash 3 "")
-    , testHashEq "06"     1424775        (hash 3 base1)
-    , testHashEq "07"     11801303       (hash 3 base2)
-    ]
-
 ------------------------------------------------------------
 --  Simple graph label tests
 ------------------------------------------------------------
     , testeq "Lab03" False (labelIsVar lab2f)
     , testeq "Lab04" True  (labelIsVar lab2v)
 
-    , testeq "Lab05"  3436883 (labelHash 1 lab1f)
-    , testeq "Lab06" 10955600 (labelHash 1 lab1v)
-    , testeq "Lab07"  3436884 (labelHash 1 lab2f)
-    , testeq "Lab08" 10955601 (labelHash 1 lab2v)
+    , testeq "Lab05" 39495998 (labelHash 1 lab1f)
+    , testeq "Lab06" 45349309 (labelHash 1 lab1v)
+    , testeq "Lab07" 39495997 (labelHash 1 lab2f)
+    , testeq "Lab08" 45349310 (labelHash 1 lab2v)
+    
 
     , testeq "Lab09" "!lab1" (show lab1f)
     , testeq "Lab10" "?lab1" (show lab1v)
 -- assignLabels :: (Label lb) => [lb] -> LabelMap lb -> LabelMap lb
 
 lmap5 :: LabelMap LabelMem
+{-
 lmap5 = tstLabelMap 2 [(s1,(1,142577)),(s2,(1,142578)),(s3,(1,142579)),
                        (p1,(1,142385)),(p2,(1,142386)),(p3,(1,142387)),
                        (o1,(1,142321)),(o2,(1,142322)),(o3,(1,142323)),
                        (l1,(1,142129)),(l4,(1,1709580)),(l10,(1,3766582)),
                        (b3,(1,262143)),(b4,(1,262143))]
-        
+-}
+
+bhash :: Int
+bhash = 23
+
+l1hash, l4hash, l10hash :: Int
+l1hash = 2524
+l4hash = -1302210307
+l10hash = 10836024  
+
+l1hash2, l4hash2, l10hash2 :: Int
+l1hash2 = 2524
+l4hash2 = -1302210307
+l10hash2 = 10836024  
+
+o1hash, o2hash, o3hash :: Int
+o1hash = 2623
+o2hash = 2620
+o3hash = 2621
+
+p1hash, p2hash, p3hash :: Int
+p1hash = 2624
+p2hash = 2627
+p3hash = 2626
+
+s1hash, s2hash, s3hash :: Int
+s1hash = 2723
+s2hash = 2720
+s3hash = 2721
+
+lmap5 = tstLabelMap 2 
+        [
+          (b4,(1,bhash)),
+          (b3,(1,bhash)),
+          (l10,(1,l10hash)),
+          (l4,(1,l4hash)),
+          (l1,(1,l1hash)),
+          (o3,(1,o3hash)),
+          (o2,(1,o2hash)),
+          (o1,(1,o1hash)),
+          (p3,(1,p3hash)),
+          (p2,(1,p2hash)),
+          (p1,(1,p1hash)),
+          (s3,(1,s3hash)),
+          (s2,(1,s2hash)),
+          (s1,(1,s1hash))
+        ]
+
 testAssignLabelMap05 :: Test        
 testAssignLabelMap05 = testeq "AssignLabels05" lmap5
                         (newGenerationMap $ assignLabelMap ls5 emptyMap)
 
 lmap6 :: LabelMap LabelMem
-lmap6 = tstLabelMap 2 [(s1,(1,142577)),(s2,(1,142578)),(s3,(1,142579)),
-                       (p1,(1,142385)),(p2,(1,142386)),(p3,(1,142387)),
-                       (o1,(1,142321)),(o2,(1,142322)),(o3,(1,142323)),
-                       (l1,(1,142129)),(l4,(1,1709580)),(l10,(1,3766582)),
-                       (b1,(2,262143)),(b2,(2,262143)),(b3,(1,262143)),(b4,(1,262143))]
-
+lmap6 = tstLabelMap 2
+        [
+          (b2,(2,bhash)),
+          (b1,(2,bhash)),
+          (b4,(1,bhash)),
+          (b3,(1,bhash)),
+          (l10,(1,l10hash)),
+          (l4,(1,l4hash)),
+          (l1,(1,l1hash)),
+          (o3,(1,o3hash)),
+          (o2,(1,o2hash)),
+          (o1,(1,o1hash)),
+          (p3,(1,p3hash)),
+          (p2,(1,p2hash)),
+          (p1,(1,p1hash)),
+          (s3,(1,s3hash)),
+          (s2,(1,s2hash)),
+          (s1,(1,s1hash))
+        ]
+        
 testAssignLabelMap06 :: Test
 testAssignLabelMap06 = testeq "AssignLabels06" lmap6 (assignLabelMap ls6 lmap5)
 
               assignLabelMap (graphLabels as61) emptyMap
 
 eq1ltst :: LabelMap (ScopedLabel LabelMem)
-eq1ltst     = tstLabelMap 2 [
-                             (s1_1,(1,142577)),(s2_1,(1,142578)),(s3_1,(1,142579)),
-                             (p1_1,(1,142385)),(p2_1,(1,142386)),(p3_1,(1,142387)),
-                             (o1_1,(1,142321)),(o2_1,(1,142322)),(o3_1,(1,142323)),
-                             (l1_1,(1,142129)),(l4_1,(1,1709580)),(l10_1,(1,3766582)),
-                             (b1_1,(1,262143)),(b2_1,(1,262143)),(b3_1,(1,262143)),(b4_1,(1,262143)),
-                             (s1_2,(1,142577)),(s2_2,(1,142578)),(s3_2,(1,142579)),
-                             (p1_2,(1,142385)),(p2_2,(1,142386)),(p3_2,(1,142387)),
-                             (o1_2,(1,142321)),(o2_2,(1,142322)),(o3_2,(1,142323)),
-                             (l1_2,(1,142129)),(l4_2,(1,1709580)),(l10_2,(1,3766582)),
-                             (b1_2,(1,262143)),(b2_2,(1,262143)),(b3_2,(1,262143)),(b4_2,(1,262143))
-                            ]
-              
+eq1ltst = tstLabelMap 2 
+          [
+            (b4_2,(1,bhash)),
+            (b3_2,(1,bhash)),
+            (p3_2,(1,p3hash)),
+            (b2_2,(1,bhash)),
+            (p2_2,(1,p2hash)),
+            (b1_2,(1,bhash)),
+            (l10_2,(1,l10hash)),
+            (l4_2,(1,l4hash)),
+            (l1_2,(1,l1hash)),
+            (o3_2,(1,o3hash)),
+            (s3_2,(1,s3hash)),
+            (o2_2,(1,o2hash)),
+            (s2_2,(1,s2hash)),
+            (o1_2,(1,o1hash)),
+            (p1_2,(1,p1hash)),
+            (s1_2,(1,s1hash)),
+            (b4_1,(1,bhash)),
+            (b3_1,(1,bhash)),
+            (p3_1,(1,p3hash)),
+            (b2_1,(1,bhash)),
+            (p2_1,(1,p2hash)),
+            (b1_1,(1,bhash)),
+            (l10_1,(1,l10hash)),
+            (l4_1,(1,l4hash)),
+            (l1_1,(1,l1hash)),
+            (o3_1,(1,o3hash)),
+            (s3_1,(1,s3hash)),
+            (o2_1,(1,o2hash)),
+            (s2_1,(1,s2hash)),
+            (o1_1,(1,o1hash)),
+            (p1_1,(1,p1hash)),
+            (s1_1,(1,s1hash))
+            ]
+          
 testEqAssignMap01 :: Test              
 testEqAssignMap01 = testeq "EqAssignMap01" eq1ltst eq1lmap
 
               assignLabelMap (graphLabels as41) emptyMap
               
 eq2ltst :: LabelMap (ScopedLabel LabelMem)
-eq2ltst     = tstLabelMap 2 [(s1_1,(1,142577)),(s2_1,(1,142578)),(s3_1,(1,142579)),
-                             (p1_1,(1,142385)),(p2_1,(1,142386)),(p3_1,(1,142387)),
-                             (o1_1,(1,142321)),(o2_1,(1,142322)),(o3_1,(1,142323)),
-                             (l1_1,(1,142129)),(l4_1,(1,1709580)),(l10_1,(1,3766582)),
-                             (b1_1,(1,262143)),(b2_1,(1,262143)),
-                             (s1_2,(1,142577)),(s2_2,(1,142578)),(s3_2,(1,142579)),
-                             (p1_2,(1,142385)),(p2_2,(1,142386)),(p3_2,(1,142387)),
-                             (o1_2,(1,142321)),(o2_2,(1,142322)),(o3_2,(1,142323)),
-                             (l1_2,(1,142129)),(l4_2,(1,1709580)),(l10_2,(1,3766582)),
-                             (b1_2,(1,262143)),(b2_2,(1,262143))]
-              
+eq2ltst = tstLabelMap 2
+          [
+            (p3_2,(1,p3hash)),
+            (b2_2,(1,bhash)),
+            (b1_2,(1,bhash)),
+            (p2_2,(1,p2hash)),
+            (l10_2,(1,l10hash)),
+            (l4_2,(1,l4hash)),
+            (l1_2,(1,l1hash)),
+            (o3_2,(1,o3hash)),
+            (s3_2,(1,s3hash)),
+            (o2_2,(1,o2hash)),
+            (s2_2,(1,s2hash)),
+            (o1_2,(1,o1hash)),
+            (p1_2,(1,p1hash)),
+            (s1_2,(1,s1hash)),
+            (p3_1,(1,p3hash)),
+            (b2_1,(1,bhash)),
+            (p2_1,(1,p2hash)),
+            (b1_1,(1,bhash)),
+            (l10_1,(1,l10hash)),
+            (l4_1,(1,l4hash)),
+            (l1_1,(1,l1hash)),
+            (o3_1,(1,o3hash)),
+            (s3_1,(1,s3hash)),
+            (o2_1,(1,o2hash)),
+            (s2_1,(1,s2hash)),
+            (o1_1,(1,o1hash)),
+            (p1_1,(1,p1hash)),
+            (s1_1,(1,s1hash))
+          ]
+
 testEqAssignMap21 :: Test
 testEqAssignMap21 = testeq "EqAssignMap21" eq2ltst eq2lmap
 
               assignLabelMap (graphLabels as22) emptyMap
               
 eq3ltst :: LabelMap (ScopedLabel LabelMem)
-eq3ltst     = tstLabelMap 2
-    [ (s1_1,(1,142577))
-    , (p1_1,(1,142385))
-    , (o1_1,(1,142321))
-    , (s1_2,(1,142577)), (s2_2,(1,142578)), (s3_2,(1,142579))
-    , (p1_2,(1,142385))
-    , (o1_2,(1,142321)), (o2_2,(1,142322)), (o3_2,(1,142323))
-    , (l1_2,(1,142129)), (l4_2,(1,1709580)), (l10_2,(1,3766582))
+eq3ltst = tstLabelMap 2
+    [ 
+      (o1_1,(1,o1hash))
+    , (p1_1,(1,p1hash))
+    , (s1_1,(1,s1hash))
+    , (l10_2,(1,l10hash))
+    , (l4_2,(1,l4hash))
+    , (l1_2,(1,l1hash))
+    , (o3_2,(1,o3hash))
+    , (s3_2,(1,s3hash))
+    , (o2_2,(1,o2hash))
+    , (s2_2,(1,s2hash))
+    , (o1_2,(1,o1hash))
+    , (p1_2,(1,p1hash))
+    , (s1_2,(1,s1hash)) 
     ]
-    
+
 testEqAssignMap32 :: Test    
 testEqAssignMap32 = testeq "EqAssignMap32" eq3ltst eq3lmap
 
 
 ec31test :: [EquivArgs]
 ec31test =
-    [ ((1,142321),[o1_1])
-    , ((1,142385),[p1_1])
-    , ((1,142577),[s1_1])
+    [ ((1,2623),[o1_1])
+    , ((1,2624),[p1_1])
+    , ((1,2723),[s1_1])
     ]
 
 ec32 :: [EquivClass]
 
 ec32test :: [EquivArgs]
 ec32test =
-    [ ((1,142129),[l1_2])
-    , ((1,142321),[o1_2])
-    , ((1,142322),[o2_2])
-    , ((1,142323),[o3_2])
-    , ((1,142385),[p1_2])
-    , ((1,142577),[s1_2])
-    , ((1,142578),[s2_2])
-    , ((1,142579),[s3_2])
-    , ((1,1709580),[l4_2])
-    , ((1,3766582),[l10_2])
+    [ 
+      ((1,-1302210307),[l4_2])
+    , ((1,2524),[l1_2])
+    , ((1,2620),[o2_2])
+    , ((1,2621),[o3_2])
+    , ((1,2623),[o1_2])
+    , ((1,2624),[p1_2])
+    , ((1,2720),[s2_2])
+    , ((1,2721),[s3_2])
+    , ((1,2723),[s1_2])
+    , ((1,10836024),[l10_2])
     ]
-
+  
 testEquivClass33_1, testEquivClass33_2 :: Test
 testEquivClass33_1 = testeq "EquivClass33_1" ec31test ec31
 testEquivClass33_2 = testeq "EquivClass33_2" ec32test ec32
 ec3pairs :: [(EquivClass, EquivClass)]
 ec3pairs = zip (pairSort ec31) (pairSort ec32)
 
+{-  This is a pointless test in this case
+
 ec3test :: [(EquivClass, EquivClass)]
 ec3test  =
     [ ( ((1,142321),[o1_1]), ((1,142321),[o1_2]) )
     , ( ((1,142577),[s1_1]), ((1,142577),[s1_2]) )
     ]
 
-{-  This is a pointless test in this case
 testEquivClass33_3 = testeq "EquivClass33_3" ec3test ec3pairs
 -}
 
 allTests = TestList
   [ testSelectSuite
   , testSubsetSuite
-  , testHashSuite
   , testLabSuite
   , testGraphSuite
   , testLabelEqSuite
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.