Source

altgraph / altgraph / Graph.py

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
"""
altgraph.Graph - Base Graph class
=================================

..
  #--Version 2.1
  #--Bob Ippolito October, 2004

  #--Version 2.0
  #--Istvan Albert June, 2004

  #--Version 1.0
  #--Nathan Denny, May 27, 1999
"""

from altgraph import GraphError
from collections import deque

class Graph(object):
    """
    The Graph class represents a directed graph with *N* nodes and *E* edges.

    Naming conventions:

    - the prefixes such as *out*, *inc* and *all* will refer to methods
      that operate on the outgoing, incoming or all edges of that node.

      For example: :py:meth:`inc_degree` will refer to the degree of the node
      computed over the incoming edges (the number of neighbours linking to
      the node).

    - the prefixes such as *forw* and *back* will refer to the
      orientation of the edges used in the method with respect to the node.

      For example: :py:meth:`forw_bfs` will start at the node then use the outgoing
      edges to traverse the graph (goes forward).
    """

    def __init__(self, edges=None):
        """
        Initialization
        """

        self.next_edge = 0
        self.nodes, self.edges = {}, {}
        self.hidden_edges, self.hidden_nodes = {}, {}

        if edges is not None:
            for item in edges:
                if len(item) == 2:
                    head, tail = item
                    self.add_edge(head, tail)
                elif len(item) == 3:
                    head, tail, data = item
                    self.add_edge(head, tail, data)
                else:
                    raise GraphError("Cannot create edge from %s"%(item,))


    def __repr__(self):
        return '<Graph: %d nodes, %d edges>' % (
            self.number_of_nodes(), self.number_of_edges())

    def add_node(self, node, node_data=None):
        """
        Adds a new node to the graph.  Arbitrary data can be attached to the
        node via the node_data parameter.  Adding the same node twice will be
        silently ignored.

        The node must be a hashable value.
        """
        #
        # the nodes will contain tuples that will store incoming edges,
        # outgoing edges and data
        #
        # index 0 -> incoming edges
        # index 1 -> outgoing edges

        if node in self.hidden_nodes:
            # Node is present, but hidden
            return

        if node not in self.nodes:
            self.nodes[node] = ([], [], node_data)

    def add_edge(self, head_id, tail_id, edge_data=1, create_nodes=True):
        """
        Adds a directed edge going from head_id to tail_id.
        Arbitrary data can be attached to the edge via edge_data.
        It may create the nodes if adding edges between nonexisting ones.

        :param head_id: head node
        :param tail_id: tail node
        :param edge_data: (optional) data attached to the edge
        :param create_nodes: (optional) creates the head_id or tail_id node in case they did not exist
        """
        # shorcut
        edge = self.next_edge

        # add nodes if on automatic node creation
        if create_nodes:
            self.add_node(head_id)
            self.add_node(tail_id)

        # update the corresponding incoming and outgoing lists in the nodes
        # index 0 -> incoming edges
        # index 1 -> outgoing edges

        try:
            self.nodes[tail_id][0].append(edge)
            self.nodes[head_id][1].append(edge)
        except KeyError:
            raise GraphError('Invalid nodes %s -> %s' % (head_id, tail_id))

        # store edge information
        self.edges[edge] = (head_id, tail_id, edge_data)


        self.next_edge += 1

    def hide_edge(self, edge):
        """
        Hides an edge from the graph. The edge may be unhidden at some later
        time.
        """
        try:
            head_id, tail_id, edge_data = self.hidden_edges[edge] = self.edges[edge]
            self.nodes[tail_id][0].remove(edge)
            self.nodes[head_id][1].remove(edge)
            del self.edges[edge]
        except KeyError:
            raise GraphError('Invalid edge %s' % edge)

    def hide_node(self, node):
        """
        Hides a node from the graph.  The incoming and outgoing edges of the
        node will also be hidden.  The node may be unhidden at some later time.
        """
        try:
            all_edges = self.all_edges(node)
            self.hidden_nodes[node] = (self.nodes[node], all_edges)
            for edge in all_edges:
                self.hide_edge(edge)
            del self.nodes[node]
        except KeyError:
            raise GraphError('Invalid node %s' % node)

    def restore_node(self, node):
        """
        Restores a previously hidden node back into the graph and restores
        all of its incoming and outgoing edges.
        """
        try:
            self.nodes[node], all_edges = self.hidden_nodes[node]
            for edge in all_edges:
                self.restore_edge(edge)
            del self.hidden_nodes[node]
        except KeyError:
            raise GraphError('Invalid node %s' % node)

    def restore_edge(self, edge):
        """
        Restores a previously hidden edge back into the graph.
        """
        try:
            head_id, tail_id, data = self.hidden_edges[edge]
            self.nodes[tail_id][0].append(edge)
            self.nodes[head_id][1].append(edge)
            self.edges[edge] = head_id, tail_id, data
            del self.hidden_edges[edge]
        except KeyError:
            raise GraphError('Invalid edge %s' % edge)

    def restore_all_edges(self):
        """
        Restores all hidden edges.
        """
        for edge in list(self.hidden_edges.keys()):
            try:
                self.restore_edge(edge)
            except GraphError:
                pass

    def restore_all_nodes(self):
        """
        Restores all hidden nodes.
        """
        for node in list(self.hidden_nodes.keys()):
            self.restore_node(node)

    def __contains__(self, node):
        """
        Test whether a node is in the graph
        """
        return node in self.nodes

    def edge_by_id(self, edge):
        """
        Returns the edge that connects the head_id and tail_id nodes
        """
        try:
            head, tail, data =  self.edges[edge]
        except KeyError:
            head, tail = None, None
            raise GraphError('Invalid edge %s' % edge)

        return (head, tail)

    def edge_by_node(self, head, tail):
        """
        Returns the edge that connects the head_id and tail_id nodes
        """
        for edge in self.out_edges(head):
            if self.tail(edge) == tail:
                return edge
        return None

    def number_of_nodes(self):
        """
        Returns the number of nodes
        """
        return len(self.nodes)

    def number_of_edges(self):
        """
        Returns the number of edges
        """
        return len(self.edges)

    def __iter__(self):
        """
        Iterates over all nodes in the graph
        """
        return iter(self.nodes)

    def node_list(self):
        """
        Return a list of the node ids for all visible nodes in the graph.
        """
        return list(self.nodes.keys())

    def edge_list(self):
        """
        Returns an iterator for all visible nodes in the graph.
        """
        return list(self.edges.keys())

    def number_of_hidden_edges(self):
        """
        Returns the number of hidden edges
        """
        return len(self.hidden_edges)

    def number_of_hidden_nodes(self):
        """
        Returns the number of hidden nodes
        """
        return len(self.hidden_nodes)

    def hidden_node_list(self):
        """
        Returns the list with the hidden nodes
        """
        return list(self.hidden_nodes.keys())

    def hidden_edge_list(self):
        """
        Returns a list with the hidden edges
        """
        return list(self.hidden_edges.keys())

    def describe_node(self, node):
        """
        return node, node data, outgoing edges, incoming edges for node
        """
        incoming, outgoing, data = self.nodes[node]
        return node, data, outgoing, incoming

    def describe_edge(self, edge):
        """
        return edge, edge data, head, tail for edge
        """
        head, tail, data = self.edges[edge]
        return edge, data, head, tail

    def node_data(self, node):
        """
        Returns the data associated with a node
        """
        return self.nodes[node][2]

    def edge_data(self, edge):
        """
        Returns the data associated with an edge
        """
        return self.edges[edge][2]

    def head(self, edge):
        """
        Returns the node of the head of the edge.
        """
        return self.edges[edge][0]

    def tail(self, edge):
        """
        Returns node of the tail of the edge.
        """
        return self.edges[edge][1]

    def out_nbrs(self, node):
        """
        List of nodes connected by outgoing edges
        """
        l = [self.tail(n) for n in self.out_edges(node)]
        return l

    def inc_nbrs(self, node):
        """
        List of nodes connected by incoming edges
        """
        l = [self.head(n) for n in self.inc_edges(node)]
        return l

    def all_nbrs(self, node):
        """
        List of nodes connected by incoming and outgoing edges
        """
        l = dict.fromkeys( self.inc_nbrs(node) + self.out_nbrs(node) )
        return list(l)

    def out_edges(self, node):
        """
        Returns a list of the outgoing edges
        """
        try:
            return list(self.nodes[node][1])
        except KeyError:
            raise GraphError('Invalid node %s' % node)

        return None

    def inc_edges(self, node):
        """
        Returns a list of the incoming edges
        """
        try:
            return list(self.nodes[node][0])
        except KeyError:
            raise GraphError('Invalid node %s' % node)

        return None

    def all_edges(self, node):
        """
        Returns a list of incoming and outging edges.
        """
        return set(self.inc_edges(node) + self.out_edges(node))

    def out_degree(self, node):
        """
        Returns the number of outgoing edges
        """
        return len(self.out_edges(node))

    def inc_degree(self, node):
        """
        Returns the number of incoming edges
        """
        return len(self.inc_edges(node))

    def all_degree(self, node):
        """
        The total degree of a node
        """
        return self.inc_degree(node) + self.out_degree(node)

    def _topo_sort(self, forward=True):
        """
        Topological sort.

        Returns a list of nodes where the successors (based on outgoing and
        incoming edges selected by the forward parameter) of any given node
        appear in the sequence after that node.
        """
        topo_list = []
        queue = deque()
        indeg = {}

        # select the operation that will be performed
        if forward:
            get_edges = self.out_edges
            get_degree = self.inc_degree
            get_next = self.tail
        else:
            get_edges = self.inc_edges
            get_degree = self.out_degree
            get_next = self.head

        for node in self.node_list():
            degree = get_degree(node)
            if degree:
                indeg[node] = degree
            else:
                queue.append(node)

        while queue:
            curr_node = queue.popleft()
            topo_list.append(curr_node)
            for edge in get_edges(curr_node):
                tail_id = get_next(edge)
                if tail_id in indeg:
                    indeg[tail_id] -= 1
                    if indeg[tail_id] == 0:
                        queue.append(tail_id)

        if len(topo_list) == len(self.node_list()):
            valid = True
        else:
            # the graph has cycles, invalid topological sort
            valid = False

        return (valid, topo_list)

    def forw_topo_sort(self):
        """
        Topological sort.

        Returns a list of nodes where the successors (based on outgoing edges)
        of any given node appear in the sequence after that node.
        """
        return self._topo_sort(forward=True)

    def back_topo_sort(self):
        """
        Reverse topological sort.

        Returns a list of nodes where the successors (based on incoming edges)
        of any given node appear in the sequence after that node.
        """
        return self._topo_sort(forward=False)

    def _bfs_subgraph(self, start_id, forward=True):
        """
        Private method creates a subgraph in a bfs order.

        The forward parameter specifies whether it is a forward or backward
        traversal.
        """
        if forward:
            get_bfs  = self.forw_bfs
            get_nbrs = self.out_nbrs
        else:
            get_bfs  = self.back_bfs
            get_nbrs = self.inc_nbrs

        g = Graph()
        bfs_list = get_bfs(start_id)
        for node in bfs_list:
            g.add_node(node)

        for node in bfs_list:
            for nbr_id in get_nbrs(node):
                g.add_edge(node, nbr_id)

        return g

    def forw_bfs_subgraph(self, start_id):
        """
        Creates and returns a subgraph consisting of the breadth first
        reachable nodes based on their outgoing edges.
        """
        return self._bfs_subgraph(start_id, forward=True)

    def back_bfs_subgraph(self, start_id):
        """
        Creates and returns a subgraph consisting of the breadth first
        reachable nodes based on the incoming edges.
        """
        return self._bfs_subgraph(start_id, forward=False)

    def iterdfs(self, start, end=None, forward=True):
        """
        Collecting nodes in some depth first traversal.

        The forward parameter specifies whether it is a forward or backward
        traversal.
        """
        visited, stack = set([start]), deque([start])

        if forward:
            get_edges = self.out_edges
            get_next = self.tail
        else:
            get_edges = self.inc_edges
            get_next = self.head

        while stack:
            curr_node = stack.pop()
            yield curr_node
            if curr_node == end:
                break
            for edge in sorted(get_edges(curr_node)):
                tail = get_next(edge)
                if tail not in visited:
                    visited.add(tail)
                    stack.append(tail)

    def iterdata(self, start, end=None, forward=True, condition=None):
        """
        Perform a depth-first walk of the graph (as ``iterdfs``)
        and yield the item data of every node where condition matches. The
        condition callback is only called when node_data is not None.
        """

        visited, stack = set([start]), deque([start])

        if forward:
            get_edges = self.out_edges
            get_next = self.tail
        else:
            get_edges = self.inc_edges
            get_next = self.head

        get_data = self.node_data

        while stack:
            curr_node = stack.pop()
            curr_data = get_data(curr_node)
            if curr_data is not None:
                if condition is not None and not condition(curr_data):
                    continue
                yield curr_data
            if curr_node == end:
                break
            for edge in get_edges(curr_node):
                tail = get_next(edge)
                if tail not in visited:
                    visited.add(tail)
                    stack.append(tail)

    def _iterbfs(self, start, end=None, forward=True):
        """
        The forward parameter specifies whether it is a forward or backward
        traversal.  Returns a list of tuples where the first value is the hop
        value the second value is the node id.
        """
        queue, visited = deque([(start, 0)]), set([start])

        # the direction of the bfs depends on the edges that are sampled
        if forward:
            get_edges = self.out_edges
            get_next = self.tail
        else:
            get_edges = self.inc_edges
            get_next = self.head

        while queue:
            curr_node, curr_step = queue.popleft()
            yield (curr_node, curr_step)
            if curr_node == end:
                break
            for edge in get_edges(curr_node):
                tail = get_next(edge)
                if tail not in visited:
                    visited.add(tail)
                    queue.append((tail, curr_step + 1))


    def forw_bfs(self, start, end=None):
        """
        Returns a list of nodes in some forward BFS order.

        Starting from the start node the breadth first search proceeds along
        outgoing edges.
        """
        return [node for node, step in self._iterbfs(start, end, forward=True)]

    def back_bfs(self, start, end=None):
        """
        Returns a list of nodes in some backward BFS order.

        Starting from the start node the breadth first search proceeds along
        incoming edges.
        """
        return [node for node, step in self._iterbfs(start, end, forward=False)]

    def forw_dfs(self, start, end=None):
        """
        Returns a list of nodes in some forward DFS order.

        Starting with the start node the depth first search proceeds along
        outgoing edges.
        """
        return list(self.iterdfs(start, end, forward=True))

    def back_dfs(self, start, end=None):
        """
        Returns a list of nodes in some backward DFS order.

        Starting from the start node the depth first search proceeds along
        incoming edges.
        """
        return list(self.iterdfs(start, end, forward=False))

    def connected(self):
        """
        Returns :py:data:`True` if the graph's every node can be reached from every
        other node.
        """
        node_list = self.node_list()
        for node in node_list:
            bfs_list = self.forw_bfs(node)
            if len(bfs_list) != len(node_list):
                return False
        return True

    def clust_coef(self, node):
        """
        Computes and returns the local clustering coefficient of node.  The
        local cluster coefficient is proportion of the actual number of edges between
        neighbours of node and the maximum number of edges between those neighbours.

        See <http://en.wikipedia.org/wiki/Clustering_coefficient#Local_clustering_coefficient>
        for a formal definition.
        """
        num = 0
        nbr_set = set(self.out_nbrs(node))

        if node in nbr_set:
            nbr_set.remove(node) # loop defense

        for nbr in nbr_set:
            sec_set = set(self.out_nbrs(nbr))
            if nbr in sec_set:
                sec_set.remove(nbr) # loop defense
            num += len(nbr_set & sec_set)

        nbr_num = len(nbr_set)
        if nbr_num:
            clust_coef = float(num) / (nbr_num * (nbr_num - 1))
        else:
            clust_coef = 0.0
        return clust_coef

    def get_hops(self, start, end=None, forward=True):
        """
        Computes the hop distance to all nodes centered around a specified node.

        First order neighbours are at hop 1, their neigbours are at hop 2 etc.
        Uses :py:meth:`forw_bfs` or :py:meth:`back_bfs` depending on the value of the forward
        parameter.  If the distance between all neighbouring nodes is 1 the hop
        number corresponds to the shortest distance between the nodes.

        :param start: the starting node
        :param end: ending node (optional). When not specified will search the whole graph.
        :param forward: directionality parameter (optional). If C{True} (default) it uses L{forw_bfs} otherwise L{back_bfs}.
        :return: returns a list of tuples where each tuple contains the node and the hop.

        Typical usage::

            >>> print (graph.get_hops(1, 8))
            >>> [(1, 0), (2, 1), (3, 1), (4, 2), (5, 3), (7, 4), (8, 5)]
            # node 1 is at 0 hops
            # node 2 is at 1 hop
            # ...
            # node 8 is at 5 hops
        """
        if forward:
            return list(self._iterbfs(start=start, end=end, forward=True))
        else:
            return list(self._iterbfs(start=start, end=end, forward=False))