Source

swish / src / Swish / GraphMatch.hs

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}

--------------------------------------------------------------------------------
--  See end of this file for licence information.
--------------------------------------------------------------------------------
-- |
--  Module      :  GraphMatch
--  Copyright   :  (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011, 2012 Douglas Burke
--  License     :  GPL V2
--
--  Maintainer  :  Douglas Burke
--  Stability   :  experimental
--  Portability :  CPP, FlexibleInstances, MultiParamTypeClasses
--
--  This module contains graph-matching logic.
--
--  The algorithm used is derived from a paper on RDF graph matching
--  by Jeremy Carroll <http://www.hpl.hp.com/techreports/2001/HPL-2001-293.html>.
--
--------------------------------------------------------------------------------

module Swish.GraphMatch
      ( graphMatch,
        -- * Exported for testing
        LabelMap, GenLabelMap(..), LabelEntry, GenLabelEntry(..),
        ScopedLabel(..), makeScopedLabel, makeScopedArc,
        LabelIndex, EquivalenceClass, nullLabelVal, emptyMap,
        labelIsVar, labelHash,
        mapLabelIndex, setLabelHash, newLabelMap,
        graphLabels, assignLabelMap, newGenerationMap,
        graphMatch1, graphMatch2, equivalenceClasses, reclassify
      ) where

import Swish.GraphClass (Arc(..), ArcSet, Label(..))
import Swish.GraphClass (getComponents, arcLabels, hasLabel, arcToTriple)

import Control.Exception.Base (assert)
import Control.Arrow (second)

import Data.Function (on)
import Data.Hashable (combine)
import Data.List (foldl', sortBy, groupBy, partition)
import Data.Ord (comparing)
import Data.Word

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S

--------------------------
--  Label index value type
--------------------------
--

-- | LabelIndex is a unique value assigned to each label, such that
--  labels with different values are definitely different values
--  in the graph;  e.g. do not map to each other in the graph
--  bijection.  The first member is a generation counter that
--  ensures new values are distinct from earlier passes.

type LabelIndex = (Word32, Word32)

-- | The null, or empty, index value.
nullLabelVal :: LabelIndex
nullLabelVal = (0, 0)

-----------------------
--  Label mapping types
-----------------------

-- | A Mapping between a label and a value (e.g. an index
-- value).
data (Label lb) => GenLabelEntry lb lv = LabelEntry lb lv

-- | A label associated with a 'LabelIndex'
type LabelEntry lb = GenLabelEntry lb LabelIndex

instance (Label lb, Show lv) => Show (GenLabelEntry lb lv) where
    show (LabelEntry k v) = show k ++ ":" ++ show v

instance (Label lb, Eq lv) => Eq (GenLabelEntry lb lv) where
    (LabelEntry k1 v1) == (LabelEntry k2 v2) = (k1,v1) == (k2,v2)

instance (Label lb, Show lv, Ord lv) => Ord (GenLabelEntry lb lv) where
    (LabelEntry lb1 lv1) `compare` (LabelEntry lb2 lv2) =
        (lb1, lv1) `compare` (lb2, lv2)

-- | Type for label->index lookup table
data (Label lb, Eq lv, Show lv) => GenLabelMap lb lv =
    LabelMap Word32 (M.Map lb lv)

-- | A label lookup table specialized to 'LabelIndex' indices.
type LabelMap lb = GenLabelMap lb LabelIndex

instance (Label lb) => Show (LabelMap lb) where
    show = showLabelMap

instance (Label lb) => Eq (LabelMap lb) where
    LabelMap gen1 lmap1 == LabelMap gen2 lmap2 =
      (gen1, lmap1) == (gen2, lmap2)

-- | The empty label map table.
emptyMap :: (Label lb) => LabelMap lb
emptyMap = LabelMap 1 M.empty

--------------------------
--  Equivalence class type
--------------------------
--

-- | Type for equivalence class description
--  (An equivalence class is a collection of labels with
--  the same 'LabelIndex' value.)

type EquivalenceClass lb = (LabelIndex, [lb])

{-
ecIndex :: EquivalenceClass lb -> LabelIndex
ecIndex = fst
-}

ecLabels :: EquivalenceClass lb -> [lb]
ecLabels = snd

{-
ecSize :: EquivalenceClass lb -> Int
ecSize = length . ecLabels
-}

ecRemoveLabel :: (Label lb) => EquivalenceClass lb -> lb -> EquivalenceClass lb
ecRemoveLabel xs l = second (L.delete l) xs

------------------------------------------------------------
--  Filter, ungroup, sort and group pairs by first member
------------------------------------------------------------

{-
pairSelect :: ((a,b) -> Bool) -> ((a,b) -> c) -> [(a,b)] -> [c]
pairSelect p f as = map f (filter p as)
-}

-- | Ungroup the pairs.
pairUngroup :: 
    (a,[b])    -- ^ Given (a,bs)
    -> [(a,b)] -- ^ Returns (a,b) for all b in bs
pairUngroup (a,bs) = [ (a,b) | b <- bs ]

-- | Order the pairs based on the first argument.
pairSort :: (Ord a) => [(a,b)] -> [(a,b)]
pairSort = sortBy (comparing fst)

-- TODO: use set on input

-- | Group the pairs based on the first argument.
pairGroup :: (Ord a) => [(a,b)] -> [(a,[b])]
pairGroup = map (factor . unzip) . groupBy eqFirst . pairSort 
    where
      -- as is not [] by construction, but would be nice to have
      -- this enforced by the types
      factor (as, bs) = (head as, bs)
      eqFirst = (==) `on` fst

------------------------------------------------------------
--  Augmented graph label value - for graph matching
------------------------------------------------------------
--
-- | This instance of class label adds a graph identifier to
--  each variable label, so that variable labels from
--  different graphs are always seen as distinct values.
--
--  The essential logic added by this class instance is embodied
--  in the eq and hash functions.  Note that variable label hashes
--  depend only on the graph in which they appear, and non-variable
--  label hashes depend only on the variable.  Label hash values are
--  used when initializing a label equivalence-class map (and, for
--  non-variable labels, also for resolving hash collisions).

data (Label lb) => ScopedLabel lb = ScopedLabel Int lb

-- | Create a scoped label given an identifier and label.
makeScopedLabel :: (Label lb) => Int -> lb -> ScopedLabel lb
makeScopedLabel = ScopedLabel 

-- | Create an arc containining a scoped label with the given identifier.
makeScopedArc :: (Label lb) => Int -> Arc lb -> Arc (ScopedLabel lb)
makeScopedArc scope = fmap (ScopedLabel scope)

instance (Label lb) => Label (ScopedLabel lb) where
    getLocal  lab    = error $ "getLocal for ScopedLabel: "++show lab
    makeLabel locnam = error $ "makeLabel for ScopedLabel: "++locnam
    labelIsVar (ScopedLabel _ lab)   = labelIsVar lab
    labelHash seed (ScopedLabel scope lab)
        | labelIsVar lab    = seed `combine` scope -- MH.hash seed $ show scope ++ "???"
        | otherwise         = labelHash seed lab

instance (Label lb) => Eq (ScopedLabel lb) where
    (ScopedLabel s1 l1) == (ScopedLabel s2 l2)
        = l1 == l2 && s1 == s2

instance (Label lb) => Show (ScopedLabel lb) where
    show (ScopedLabel s1 l1) = show s1 ++ ":" ++ show l1

instance (Label lb) => Ord (ScopedLabel lb) where
    compare (ScopedLabel s1 l1) (ScopedLabel s2 l2) =
        case compare s1 s2 of
            LT -> LT
            EQ -> compare l1 l2
            GT -> GT

-- QUS: why doesn't this return Maybe (LabelMap (ScopedLabel lb)) ?

-- TODO: Should this use Set (Arc lb) instead of [Arc lb]?

-- | Graph matching function accepting two lists of arcs and
--  returning a node map if successful
--
graphMatch :: (Label lb) =>
    (lb -> lb -> Bool)
    -- ^ a function that tests for additional constraints
    --   that may prevent the matching of a supplied pair
    --   of nodes.  Returns `True` if the supplied nodes may be
    --   matched.  (Used in RDF graph matching for checking
    --   that formula assignments are compatible.)
    -> ArcSet lb -- ^ the first graph to be compared
    -> ArcSet lb -- ^ the second graph to be compared
    -> (Bool, LabelMap (ScopedLabel lb))
    -- ^ If the first element is `True` then the second element maps each label
    --   to an equivalence class identifier, otherwise it is just
    --   `emptyMap`.
    --
graphMatch matchable gs1 gs2 =
    let
        sgs1    = {- trace "sgs1 " $ -} S.map (makeScopedArc 1) gs1
        sgs2    = {- trace "sgs2 " $ -} S.map (makeScopedArc 2) gs2
        ls1     = {- traceShow "ls1 " $ -} graphLabels sgs1
        ls2     = {- traceShow "ls2 " $ -} graphLabels sgs2
        lmap    = {- traceShow "lmap " $ -}
                  newGenerationMap $
                  assignLabelMap ls1 $
                  assignLabelMap ls2 emptyMap
        ec1     = {- traceShow "ec1 " $ -} equivalenceClasses lmap ls1
        ec2     = {- traceShow "ec2 " $ -} equivalenceClasses lmap ls2
        ecpairs = zip (pairSort ec1) (pairSort ec2)
        matchableScoped (ScopedLabel _ l1) (ScopedLabel _ l2) = matchable l1 l2
        match   = graphMatch1 False matchableScoped sgs1 sgs2 lmap ecpairs
    in
        if length ec1 /= length ec2 then (False,emptyMap) else match

--  TODO:
--
--    * replace Equivalence class pair by @(index,[lb],[lb])@ ?
--
--    * possible optimization:  the @graphMapEq@ test should be
--      needed only if `graphMatch2` has been used to guess a
--      mapping;  either: 
--          a) supply flag saying guess has been used, or
--          b) move test to `graphMatch2` and use different
--             test to prevent rechecking for each guess used.
--

-- | Recursive graph matching function
--
--  This function assumes that no variable label appears in both graphs.
--  (Function `graphMatch`, which calls this, ensures that all variable
--  labels are distinct.)
--

graphMatch1 :: 
  (Label lb) 
  => Bool
  -- ^ `True` if a guess has been used before trying this comparison,
  --   `False` if nodes are being matched without any guesswork
  -> (lb -> lb -> Bool)
  -- ^ Test for additional constraints that may prevent the matching
  --  of a supplied pair of nodes.  Returns `True` if the supplied
  --  nodes may be matched.
  -> ArcSet lb 
  -- ^ (@gs1@ argument)
  --   first of two lists of arcs (triples) to be compared
  -> ArcSet lb
  -- ^ (@gs2@ argument)
  --   secind of two lists of arcs (triples) to be compared
  -> LabelMap lb
  -- ^ the map so far used to map label values to equivalence class
  --   values
  -> [(EquivalenceClass lb,EquivalenceClass lb)]
  -- ^ (the @ecpairs@ argument) list of pairs of corresponding
  --   equivalence classes of nodes from @gs1@ and @gs2@ that have not
  --   been confirmed in 1:1 correspondence with each other.  Each
  --   pair of equivalence classes contains nodes that must be placed
  --   in 1:1 correspondence with each other.
  --
  -> (Bool,LabelMap lb)
  -- ^ the pair @(match, map)@ where @match@ is @True@ if the supplied
  --   sets of arcs can be matched, in which case @map@ is a
  --   corresponding map from labels to equivalence class identifiers.
  --   When @match@ is @False@, @map@ is the most detailed equivalence
  --   class map obtained before a mismatch was detected or a guess
  --   was required -- this is intended to help identify where the
  --   graph mismatch may be.
graphMatch1 guessed matchable gs1 gs2 lmap ecpairs =
    let
        (secs,mecs) = partition uniqueEc ecpairs
        uniqueEc ( (_,[_])  , (_,[_])  ) = True
        uniqueEc (  _       ,  _       ) = False
        
        doMatch  ( (_,[l1]) , (_,[l2]) ) = labelMatch matchable lmap l1 l2
        doMatch  x = error $ "doMatch failue: " ++ show x -- keep -Wall happy

        ecEqSize ( (_,ls1)  , (_,ls2)  ) = length ls1 == length ls2
        eSize    ( (_,ls1)  , _        ) = length ls1
        ecCompareSize = comparing eSize
        (lmap',mecs',newEc,matchEc) = reclassify gs1 gs2 lmap mecs
        match2 = graphMatch2 matchable gs1 gs2 lmap $ sortBy ecCompareSize mecs
    in
        -- trace ("graphMatch1\nsingle ECs:\n"++show secs++
        --                   "\nmultiple ECs:\n"++show mecs++
        --                   "\n\n") $
        --  if mismatch in singleton equivalence classes, fail
        if not $ all doMatch secs then (False,lmap)
        else
        --  if no multi-member equivalence classes,
        --  check and return label map supplied
        -- trace ("graphMatch1\ngraphMapEq: "++show (graphMapEq lmap gs1 gs2)) $
        if null mecs then (graphMapEq lmap gs1 gs2,lmap)
        else
        --  if size mismatch in equivalence classes, fail
        -- trace ("graphMatch1\nall ecEqSize mecs: "++show (all ecEqSize mecs)) $
        
          --  invoke reclassification, and deal with result
          if not (all ecEqSize mecs) || not matchEc
            then (False, lmap)
            else if newEc
                   then graphMatch1 guessed matchable gs1 gs2 lmap' mecs'
                        --  if guess does not result in a match, return supplied label map
                   else if fst match2 then match2 else (False, lmap)

{-
          if not $ all ecEqSize mecs then (False,lmap)
        else
        if not matchEc then (False,lmap)
        else
        if newEc then graphMatch1 guessed matchable gs1 gs2 lmap' mecs'
        else
        if fst match2 then match2 else (False,lmap)
-}

-- | Auxiliary graph matching function
--
--  This function is called when deterministic decomposition of node
--  mapping equivalence classes has run its course.
--
--  It picks a pair of equivalence classes in ecpairs, and arbitrarily matches
--  pairs of nodes in those equivalence classes, recursively calling the
--  graph matching function until a suitable node mapping is discovered
--  (success), or until all such pairs have been tried (failure).
--
--  This function represents a point to which arbitrary choices are backtracked.
--  The list comprehension 'glp' represents the alternative choices at the
--  point of backtracking
--
--  The selected pair of nodes are placed in a new equivalence class based on their
--  original equivalence class value, but with a new NodeVal generation number.

graphMatch2 :: (Label lb) => (lb -> lb -> Bool)
    -> ArcSet lb
    -> ArcSet lb
    -> LabelMap lb 
    -> [(EquivalenceClass lb,EquivalenceClass lb)]
    -> (Bool,LabelMap lb)
graphMatch2 _         _   _   _    [] = error "graphMatch2 sent an empty list" -- To keep -Wall happy
graphMatch2 matchable gs1 gs2 lmap ((ec1@(ev1,ls1),ec2@(ev2,ls2)):ecpairs) =
    let
        v1 = snd ev1
        --  Return any equivalence-mapping obtained by matching a pair
        --  of labels in the supplied list, or Nothing.
        try []            = (False,lmap)
        try ((l1,l2):lps) = if isEquiv try1 l1 l2 then try1 else try lps
            where
                try1     = graphMatch1 True matchable gs1 gs2 lmap' ecpairs'
                lmap'    = newLabelMap lmap [(l1,v1),(l2,v1)]
                ecpairs' = ((ev',[l1]),(ev',[l2])):ec':ecpairs
                ev'      = mapLabelIndex lmap' l1
                ec'      = (ecRemoveLabel ec1 l1, ecRemoveLabel ec2 l2)
                -- [[[TODO: replace this: if isJust try ?]]]
                isEquiv (False,_)   _  _  = False
                isEquiv (True,lm) x1 x2 =
                    mapLabelIndex m1 x1 == mapLabelIndex m2 x2
                    where
                        m1 = remapLabels gs1 lm [x1]
                        m2 = remapLabels gs2 lm [x2]
        --  glp is a list of label-pair candidates for matching,
        --  selected from the first label-equivalence class.
        --  NOTE:  final test is call of external matchable function
        glp = [ (l1,l2) | l1 <- ls1 , l2 <- ls2 , matchable l1 l2 ]
    in
        assert (ev1==ev2) -- "GraphMatch2: Equivalence class value mismatch" $
        $ try glp

-- this was in Swish.Utils.MiscHelpers along with a simple hash-based function
-- based on Sedgewick, Algorithms in C, p233. As we have now moved to using
-- Data.Hashable it is not clear whether this is still necessary or sensible.
--
hashModulus :: Int
hashModulus = 16000001

-- | Returns a string representation  of a LabelMap value
--
showLabelMap :: (Label lb) => LabelMap lb -> String
showLabelMap (LabelMap gn lmap) =
    "LabelMap gen="++ Prelude.show gn ++", map="++
    foldl' (++) "" (map (("\n    "++) . Prelude.show) es)
    where
        es = M.toList lmap

-- | Map a label to its corresponding label index value in the
--   supplied LabelMap.
--
mapLabelIndex :: (Label lb) => LabelMap lb -> lb -> LabelIndex
mapLabelIndex (LabelMap _ lxms) lb = M.findWithDefault nullLabelVal lb lxms

-- | Confirm that a given pair of labels are matchable, and are
--  mapped to the same value by the supplied label map
--
labelMatch :: (Label lb)
    =>  (lb -> lb -> Bool) -> LabelMap lb -> lb -> lb -> Bool
labelMatch matchable lmap l1 l2 =
    matchable l1 l2 && (mapLabelIndex lmap l1 == mapLabelIndex lmap l2)

-- | Replace selected values in a label map with new values from the supplied
--  list of labels and new label index values.  The generation number is
--  supplied from the current label map.  The generation number in the
--  resulting label map is incremented.
--
newLabelMap :: (Label lb) => LabelMap lb -> [(lb, Word32)] -> LabelMap lb
newLabelMap lmap []       = newGenerationMap lmap
newLabelMap lmap (lv:lvs) = setLabelHash (newLabelMap lmap lvs) lv

-- | Replace a label and its associated value in a label map
--  with a new value using the supplied hash value and the current
--  `LabelMap` generation number.  If the key is not found, then no change
--  is made to the label map.

setLabelHash :: (Label lb)
    => LabelMap lb -> (lb, Word32) -> LabelMap lb
setLabelHash  (LabelMap g lmap) (lb,lh) =
    LabelMap g $ M.insert lb (g,lh) lmap

-- | Increment the generation of the label map.
--
--  Returns a new label map identical to the supplied value
--  but with an incremented generation number.
--
newGenerationMap :: (Label lb) => LabelMap lb -> LabelMap lb
newGenerationMap (LabelMap g lvs) = LabelMap (g+1) lvs

-- | Scan label list, assigning initial label map values,
--  adding new values to the label map supplied.
--
--  Label map values are assigned on the basis of the
--  label alone, without regard for it's connectivity in
--  the graph.  (cf. `reclassify`).
--
--  All variable node labels are assigned the same initial
--  value, as they may be matched with each other.
--
assignLabelMap :: (Label lb) => S.Set lb -> LabelMap lb -> LabelMap lb
assignLabelMap ns lmap = S.foldl' (flip assignLabelMap1) lmap ns

assignLabelMap1 :: (Label lb) => lb -> LabelMap lb -> LabelMap lb
assignLabelMap1 lab (LabelMap g lvs) = 
    LabelMap g $ M.insertWith (flip const) lab (g, initVal lab) lvs

--  Calculate initial value for a node

initVal :: (Label lb) => lb -> Word32
initVal = fromIntegral . hashVal 0

hashVal :: (Label lb) => Int -> lb -> Int
hashVal seed lab =
  if labelIsVar lab then seed `combine` 23 else labelHash seed lab
  -- if labelIsVar lab then hash seed "???" else labelHash seed lab

-- | Return the equivalence classes of the supplied nodes 
-- using the label map.
equivalenceClasses :: 
  (Label lb) 
  => LabelMap lb     -- ^ label map
  -> S.Set lb        -- ^ nodes to be reclassified
  -> [EquivalenceClass lb]
equivalenceClasses lmap ls =
    pairGroup $ S.toList $ S.map labelPair ls
    where
        labelPair l = (mapLabelIndex lmap l,l)

-- | Reclassify labels
--
--  Examines the supplied label equivalence classes (based on the supplied
--  label map), and evaluates new equivalence subclasses based on node
--  values and adjacency (for variable nodes) and rehashing
--  (for non-variable nodes).
--
--  Note, assumes that all all equivalence classes supplied are
--  non-singletons;  i.e. contain more than one label.
--
reclassify :: 
  (Label lb) 
  => ArcSet lb 
  -- ^ (the @gs1@ argument) the first of two sets of arcs to perform a
  --   basis for reclassifying the labels in the first equivalence
  --   class in each pair of @ecpairs@.
  -> ArcSet lb
  -- ^ (the @gs2@ argument) the second of two sets of arcs to perform a
  --   basis for reclassifying the labels in the second equivalence
  --   class in each pair of the @ecpairs@ argument
  -> LabelMap lb 
  -- ^ the label map used for classification of the labels in
  --   the supplied equivalence classes
  -> [(EquivalenceClass lb,EquivalenceClass lb)]
  -- ^ (the @ecpairs@ argument) a list of pairs of corresponding equivalence classes of
  --   nodes from @gs1@ and @gs2@ that have not been confirmed
  --   in 1:1 correspondence with each other.
  -> (LabelMap lb,[(EquivalenceClass lb,EquivalenceClass lb)],Bool,Bool)
  -- ^ The output tuple consists of:
  --
  --  1) a revised label map reflecting the reclassification
  --
  --  2) a new list of equivalence class pairs based on the
  --   new node map
  --
  --  3) if the reclassification partitions any of the
  --     supplied equivalence classes then `True`, else `False`
  --
  --  4) if reclassification results in each equivalence class
  --     being split same-sized equivalence classes in the two graphs,
  --     then `True`, otherwise `False`.

reclassify gs1 gs2 lmap@(LabelMap _ lm) ecpairs =
    assert (gen1==gen2) -- "Label map generation mismatch"
      (LabelMap gen1 lm',ecpairs',newPart,matchPart)
    where
        LabelMap gen1 lm1 =
            remapLabels gs1 lmap $ foldl1 (++) $ map (ecLabels . fst) ecpairs
        LabelMap gen2 lm2 =
            remapLabels gs2 lmap $ foldl1 (++) $ map (ecLabels . snd) ecpairs

        lm' = classifyCombine lm $ M.union lm1 lm2
        
        tmap f (a,b) = (f a, f b)
        
        -- ecGroups :: [([EquivalenceClass lb],[EquivalenceClass lb])]
        ecGroups  = map (tmap remapEc) ecpairs
        ecpairs'  = concatMap (uncurry zip) ecGroups
        newPart   = any pairG1 lenGroups
        matchPart = all pairEq lenGroups
        lenGroups = map (tmap length) ecGroups
        pairEq = uncurry (==)
        pairG1 (p1,p2) = p1 > 1 || p2 > 1
        remapEc = pairGroup . map (newIndex lm') . pairUngroup 
        newIndex x (_,lab) = (M.findWithDefault nullLabelVal lab x,lab)

-- Replace the values in lm1 with those from lm2, but do not copy over new
-- keys from lm2
classifyCombine :: (Ord a) => M.Map a b -> M.Map a b -> M.Map a b
#if MIN_VERSION_containers(0,5,0)
classifyCombine = M.mergeWithKey (\_ _ v -> Just v) id (const M.empty)
#else
-- rely on the left-biased nature of union
classifyCombine lm1 lm2 = M.intersection lm2 lm1 `M.union` lm1
#endif

-- | Calculate a new index value for a supplied set of labels based on the
--  supplied label map and adjacency calculations in the supplied graph
--
remapLabels :: 
  (Label lb) 
  => ArcSet lb -- ^ arcs used for adjacency calculations when remapping
  -> LabelMap lb -- ^ the current label index values
  -> [lb] -- ^ the graph labels for which new mappings are to be created
  -> LabelMap lb
  -- ^ the updated label map containing recalculated label index values
  -- for the given graph labels. The label map generation number is
  -- incremented by 1.
remapLabels gs lmap@(LabelMap gen _) ls =
    LabelMap gen' $ M.fromList newEntries
    where
        gen'                = gen+1
        newEntries          = [ (l, (gen', fromIntegral (newIndex l))) | l <- ls ]
        newIndex l
            | labelIsVar l  = mapAdjacent l                 -- adjacency classifies variable labels
            | otherwise     = hashVal (fromIntegral gen) l  -- otherwise rehash (to disentangle collisions)
        -- mapAdjacent l       = sum (sigsOver l) `rem` hashModulus
        mapAdjacent l       = sum (sigsOver l) `combine` hashModulus -- is this a sensible replacement for `rem` MH.hashModulus        

        gls = S.toList gs

        sigsOver l          = select (hasLabel l) gls (arcSignatures lmap gls)

-- |Select is like filter, except that it tests one list to select
--  elements from a second list.
select :: ( a -> Bool ) -> [a] -> [b] -> [b]
select _ [] []           = []
select f (e1:l1) (e2:l2)
    | f e1      = e2 : select f l1 l2
    | otherwise = select f l1 l2
select _ _ _    = error "select supplied with different length lists"

-- | Return the set of distinct labels used in the graph.

graphLabels :: (Label lb) => ArcSet lb -> S.Set lb
graphLabels = getComponents arcLabels

-- TODO: worry about overflow?

-- TODO: should probably return a Set of (Int, Arc lb) or something, 
-- as may be useful for the calling code

-- | Calculate a signature value for each arc that can be used in 
--   constructing an adjacency based value for a node.  The adjacancy
--   value for a label is obtained by summing the signatures of all
--   statements containing that label.
--
arcSignatures :: 
  (Label lb) 
  => LabelMap lb -- ^ the current label index values
  -> [Arc lb] -- ^ calculate signatures for these arcs
  -> [Int] -- ^ the signatures of the arcs
arcSignatures lmap =
    map (sigCalc . arcToTriple) 
    where
        sigCalc (s,p,o)  =
            fromIntegral ( labelVal2 s +
                           labelVal2 p * 3 +
                           labelVal2 o * 5 )
            `combine` hashModulus
            -- `rem` hashModulus
          
        labelVal         = mapLabelIndex lmap
        labelVal2        = uncurry (*) . labelVal

-- | Return a new graph that is supplied graph with every node/arc
--  mapped to a new value according to the supplied function.
--
--  Used for testing for graph equivalence under a supplied
--  label mapping;  e.g.
--
--  >  if ( graphMap nodeMap gs1 ) == ( graphMap nodeMap gs2 ) then (same)
--
graphMap ::
    (Label lb)
    => LabelMap lb
    -> ArcSet lb
    -> ArcSet LabelIndex
graphMap = S.map . fmap . mapLabelIndex

-- | Compare a pair of graphs for equivalence under a given mapping
--   function.
--
--  This is used to perform the ultimate test that two graphs are
--  indeed equivalent:  guesswork in `graphMatch2` means that it is
--  occasionally possible to construct a node mapping that generates
--  the required singleton equivalence classes, but does not fully
--  reflect the topology of the graphs.

graphMapEq ::
    (Label lb) 
    => LabelMap lb
    -> ArcSet lb
    -> ArcSet lb 
    -> Bool
graphMapEq lmap = (==) `on` graphMap lmap

--------------------------------------------------------------------------------
--
--  Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin,
--    2011, 2012 Douglas Burke
--  All rights reserved.
--
--  This file is part of Swish.
--
--  Swish is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  Swish is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--  You should have received a copy of the GNU General Public License
--  along with Swish; if not, write to:
--    The Free Software Foundation, Inc.,
--    59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--
--------------------------------------------------------------------------------