Source

bubble-economy / spanning_tree.py

Full commit
from heapq import heappush, heappop
import math
import numpy as np
from scipy.spatial.distance import squareform

def spanning_weight(weights, variances=None):
    """determing the sum of edge weights in a maximum spanning tree for a
    connected undirected graph, using Prim's algorithm. sum (independent)
    variances, if supplied."""
    weights = np.asarray(weights)
    if variances is not None:
        variances = np.asarray(variances)

    if (weights.ndim==2 and weights.shape[0]==weights.shape[1]):
        #valid square distance matrix
        pass
    else:
        #hmmmm- must be condensed-form? Convert to squareform.
        weights = squareform(weights.ravel())
        if variances is not None:
            variances = squareform(variances.ravel())
            
    num_nodes = weights.shape[0]
    path = -np.ones(((num_nodes-1), 2), dtype=int)
    #initialise our adjacency matrix. initially, we only ignore the diagonal
    masked_weights = np.ma.masked_array(
        weights,
        mask=np.eye(num_nodes, dtype=np.bool),
        fill_value=-np.inf)
    unavailable_rows = np.ones(num_nodes, dtype=np.bool)

    #initialise our spanning tree with node 0
    path_weight = 0.
    path_variance = 0.
    unavailable_rows[0] = False
    node_count = 1
    
    #walk through list augmenting the tree when we find edges that connect
    #the tree to a node outside    
    for edge in xrange(0, num_nodes-1):
        #find the next candidate
        candidate_idxs = np.ma.array(
          masked_weights.argmax(1),
          mask=unavailable_rows,
          fill_value=-np.inf)
        candidate_vals = np.ma.array(
          masked_weights.max(1),
          mask=unavailable_rows,
          fill_value=-np.inf)
        winning_tree_node = candidate_vals.argmax()
        winning_new_node = candidate_idxs[winning_tree_node]
        path_weight += candidate_vals[winning_tree_node]
        node_count += 1
        unavailable_rows[winning_new_node] = False
        masked_weights.mask[winning_tree_node, winning_new_node] = True
        masked_weights.mask[winning_new_node, winning_tree_node] = True
        if variances is not None:
            path_variance += variances[winning_tree_node, winning_new_node]
        path[edge][:] = [winning_new_node, winning_tree_node]
    ret = (path_weight,)
    if variances is not None:
        ret += (path_variance,)
    ret += (node_count,)
    
    return ret
    
def squareform_size(m):
    """Given a condensed list of m=n \choose{} 2 pairwise distances, it helps
    to know n."""
    num_nodes = (1+math.sqrt(1+8*m))/2
    int_num_nodes = int(num_nodes)
    if int_num_nodes != num_nodes:
        raise ValueError(
          "%d is not a combinatorially possible length"
          " for a distance matrix" % m)
    return int_num_nodes

### C version of squareform calculations
# void dist_to_squareform_from_vector(double *M, const double *v, int n) {
#   double *it;
#   const double *cit;
#   int i, j;
#   cit = v;
#   for (i = 0; i < n - 1; i++) {
#     it = M + (i * n) + i + 1;
#     for (j = i + 1; j < n; j++, it++, cit++) {
#       *it = *cit;
#     }
#   }
# }
# 
# void dist_to_vector_from_squareform(const double *M, double *v, int n) {
#   double *it;
#   const double *cit;
#   int i, j;
#   it = v;
#   for (i = 0; i < n - 1; i++) {
#     cit = M + (i * n) + i + 1;
#     for (j = i + 1; j < n; j++, it++, cit++) {
#       *it = *cit;
#     }
#   }
# }