Source

brightway2-analyzer / bw2analyzer / page_rank.py

from brightway2 import Database
from bw2calc import LCA
from numpy import array, ones, absolute, dot, where


class ConvergenceError(StandardError):
    pass


class PageRank(object):
    def __init__(self, database):
        self.database = database
        self.process = Database(database).load().keys()[0]

    def calculate(self):
        self.lca = LCA({self.process: 1})
        self.lca.lci()
        self.rev_dict, dummy = self.lca.reverse_dict()
        self.matrix = self.lca.technosphere_matrix.data.transpose()
        self.pr = [(x[0], self.rev_dict[x[1]]) for x in self.page_rank(
            self.matrix)]
        return self.pr

    def page_rank(self, technosphere, alpha=0.85, max_iter=100, tol=1e-6):
        """
Return the PageRank of the nodes in the graph.

Adapted from http://networkx.lanl.gov/svn/networkx/trunk/networkx/algorithms/link_analysis/pagerank_alg.py

PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
an algorithm to rank web pages.

The eigenvector calculation uses power iteration with a SciPy
sparse matrix representation.

Args:
    * *technosphere* (scipy sparse matrix): The technosphere matrix.
    * *alpha* (float, optional): Damping parameter for PageRank, default=0.85

Returns:
    * Dictionary of nodes (activity codes) with value as PageRank


References

.. [1] A. Langville and C. Meyer,
   "A survey of eigenvector methods of web information retrieval."
   http://citeseer.ist.psu.edu/713792.html
.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
   The PageRank citation ranking: Bringing order to the Web. 1999
   http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
        """
        mat = technosphere.copy()
        (n, m) = mat.shape
        assert n == m  # should be square
        nodelist = range(n)

        # Drop diagonals, and only indicate adjacency
        mat.data[:] = 1
        for x in xrange(n):
            mat[x, x] = 0

        column_sum = array(mat.sum(axis=1)).flatten()
        index = where(column_sum != 0)[0]
        mat = mat.tolil()
        for i in index:
            # Workaround for lack of fancy indexing in CSR matrices
            mat[i, :] *= 1.0 / column_sum[i]

        mat = mat.tocsc()
        x = ones((n)) / n  # initial guess
        dangle = array(where(mat.sum(axis=1) == 0, 1.0 / n,
            0)).flatten()
        i = 0

        while True:  # power iteration: make up to max_iter iterations
            xlast = x
            x = alpha * (x * mat + dot(dangle, xlast)) + (1 - alpha
                ) * xlast.sum() / n
            # check convergence, l1 norm
            err = absolute(x - xlast).sum()
            if err < n * tol:
                break
            if i > max_iter:
                raise ConvergenceError("pagerank: power iteration "
                    "failed to converge in %d iterations." % (i + 1))
            i += 1

        return sorted(zip(x, nodelist), reverse=True)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.