Commits

Doug Burke committed b82c975

Add initial version of Ahn, Bagrow, & Lehmann community detectino

  • Participants
  • Parent commits d69cfc8

Comments (0)

Files changed (3)

File CommDetect.hs

+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+
+-- | Attempt to implement the community detection algorithm of
+--   Ahn, Bagrow and Lehmann,
+--   Link communities reveal multiscale complexity in networks,
+--   Nature (doi:10.1038/nature09182),
+--   <http://www.nature.com/nature/journal/v466/n7307/abs/nature09182.html>.
+--
+--   See <http://barabasilab.neu.edu/projects/linkcommunities/>.
+--
+--   TODO: tests
+--
+--   TODO: convert to a more-standard Graph representation.
+--
+--   TODO: support weights
+--
+--   TODO: a better API
+--
+--   TODO: performance
+--
+module CommDetect
+    ( NodeLabel
+    , toNodeLabel
+    , fromNodeLabel
+    , NodePair
+    , toNodePair
+    , fromNodePair
+    , Edges
+    , toEdges
+    , fromEdges
+    , findCommunities
+    ) where
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+import Control.Monad.State.Strict
+
+import Data.List (foldl', sortBy)
+import Data.Maybe (fromMaybe)
+import Data.Ratio (Ratio, (%))
+
+eLookup :: (Ord k, Show k) => String -> k -> M.Map k a -> a
+eLookup emsg k = fromMaybe (error ("*Internal error* " ++ emsg ++ " " ++ show k)) . M.lookup k
+
+-- | Represent a node. Create and extract with
+--   `toNodeLabel` and `fromNodeLabel` respectively.
+--
+data NodeLabel = NL { fromNodeLabel :: Int }
+   deriving (Eq, Ord, Show)
+
+-- | Create a `NodeLabel`.
+toNodeLabel :: Int -> NodeLabel
+toNodeLabel = NL
+
+-- | A pair of nodes representing an undirected connection.
+--   Use `toNodePair` and `fromNodePair` to convert
+--   to and from a @Nodepair@.
+--
+--   Note that @fromNodePair (toNodePair n1 n2)@ is
+--   @(min n1 n2, max n1 n2)@.
+--
+--   Since the connections are undirected we
+--   ensure that the first node is "before"
+--   the second, and we do not allow loops (i.e.
+--   a connection between the same node).
+--
+data NodePair = NP { fromNodePair :: (NodeLabel, NodeLabel) }
+   deriving (Eq, Ord, Show)
+
+-- | Create a connection between two nodes.
+--
+--   The routine will fail if the two nodes are the same.
+--
+toNodePair :: NodeLabel -> NodeLabel -> Maybe NodePair
+toNodePair n1 n2 = case compare n1 n2 of
+                     EQ -> Nothing
+                     LT -> Just $ NP (n1,n2)
+                     GT -> Just $ NP (n2,n1)
+
+unsafeToNP :: NodeLabel -> NodeLabel -> NodePair
+unsafeToNP n1 n2 =
+    fromMaybe
+        (error ("*Internal error* unable to convert nodes " ++ show n1 ++ "," ++ show n2 ++ " into a pair"))
+        (toNodePair n1 n2)
+
+-- | The edges in an undirected graph with no loops (i.e. no node
+--   connected to itself). Create with
+--   `toEdges` and extract with `fromEdges`.
+--
+data Edges = EG { fromEdges :: S.Set NodePair }
+             deriving (Eq, Show)
+
+toEdges :: S.Set NodePair -> Edges
+toEdges = EG
+
+-- | Connections between nodes. If @n2@ is a member of the nodes
+--   connected to @n1@ then @n1@ will be a member of the nodes
+--   connected to @n2@.
+--
+type AdjacencyList = M.Map NodeLabel (S.Set NodeLabel)
+
+toAL :: Edges -> AdjacencyList
+toAL (EG edges) = 
+    let addToMap omap (NP (u1,u2)) = M.insertWith' S.union u1 (S.singleton u2) $ M.insertWith' S.union u2 (S.singleton u1) omap
+    in S.foldl' addToMap M.empty edges
+
+-- | Return a map of the neighbors of a node, including itself.
+getNeighbours :: AdjacencyList -> M.Map NodeLabel (S.Set NodeLabel)
+getNeighbours = M.mapWithKey S.insert
+
+-- | Return all pairs from the set (assuming an undirected graph).
+--
+--   The output is not guaranteed to be ordered.
+--
+getPairs :: S.Set NodeLabel -> [NodePair]
+getPairs = map (uncurry unsafeToNP) . pairUp . S.toAscList
+
+-- | Return pairs of elements, so that
+--   @pairUp [1,2,3,4] == [(1,2), (1,3), (1,4), (2,3), (2,4), (3,4)]@.
+--
+--   If (1,2) is in the output then we do not want (2,1); we
+--   assume that the input list is ordered and unique.
+pairUp :: [a] -> [(a, a)]
+pairUp [] = []
+pairUp [_] = []
+pairUp (x:xs) = map (x,) xs ++ pairUp xs
+
+-- It is posible that the density calculation can overflow when using Int
+-- so switch to Integer. Perhaps I should just switch to Double instead?
+-- Since the similarity is calculated as a ratio of set sizes, which are
+-- both Ints, we leave this ratio as is.
+--
+type Similarity = Ratio Int
+type Density = Ratio Integer
+
+-- | It seems like this should be a Max Heap, since we are interested
+-- in those edges with higher similarity. For now just use a map.
+type SimMap = M.Map Similarity (S.Set (NodePair, NodePair))
+
+-- | Calculate the edge similarites, where the output is
+--   ordered by similarity. Since there are no weights
+--   we use the Jaccard index as the similarity measure.
+--
+--   See section 2.1.1 of the Suplementary Information of
+--   Ahn, Bagrow, & Lehman.
+--
+edgeSimilarities :: AdjacencyList -> SimMap
+edgeSimilarities amap = 
+    let ns = getNeighbours amap
+
+        -- remove those nodes that are only connected to one other node,
+        -- so they can not be a keystone node
+        filtMap = M.filter (\s -> S.size s > 1) amap
+
+        -- get all the impost nodes for each keystone node
+        pairsMap = M.map getPairs filtMap
+
+        -- calculate the similarity of this node triple
+        -- (two impost nodes which are both connected to
+        -- the keystone node).
+        procPair :: 
+            NodeLabel    -- ^ the keystone node
+            -> SimMap    -- ^ input similarity map
+            -> NodePair  -- ^ the impost nodes
+            -> SimMap    -- ^ updated similarity map
+        procPair n orig (NP (u1,u2)) = 
+            let orderPair :: (Ord a) => (a, a) -> (a, a)
+                orderPair o@(a,b) = if a > b then (b,a) else o
+
+                edge = orderPair (unsafeToNP u1 n, unsafeToNP u2 n)
+                emsg = "missing node connection for node " ++ show n
+                getSet u = eLookup emsg u ns
+                n1   = getSet u1 -- ^ inclusive neighbors (n_+(i))
+                n2   = getSet u2 -- ^ inclusive neighbors (n_+(j))
+
+                -- calculate the Jaccard index
+                sim  = S.size (n1 `S.intersection` n2) % S.size (n1 `S.union` n2)
+            in M.insertWith' S.union sim (S.singleton edge) orig
+
+        -- process all pairs of impost nodes for the given keystone node.
+        proc :: SimMap -> NodeLabel -> [NodePair] -> SimMap
+        proc orig n = foldl' (procPair n) orig
+
+    in M.foldlWithKey' proc M.empty pairsMap
+
+type Community = Int
+
+data PartitionState =
+    PartitionState
+    {
+      psDensity       :: Density
+                         -- ^ this is the partition density (equation 3)
+                         --   *without* the leading 2/M term
+    , psCommunity     :: M.Map NodePair Community
+                         -- ^ what community is this edge in?
+    , psEdges         :: M.Map Community (S.Set NodePair)
+                         -- ^ what edges are in the community?
+    , psNodes         :: M.Map Community (S.Set NodeLabel)
+                         -- ^ what nodes are in the community?
+    } deriving (Eq, Show)
+
+-- | What community does the node pair belong to? We assume that
+--   the edge exists in the graph.
+getCommunity :: PartitionState -> NodePair -> Community
+getCommunity PartitionState {..} edge = 
+    eLookup "unknown edge" edge psCommunity
+
+-- | What edges belong to the community? We assume that the
+--   community exists.
+getCommunityEdges :: PartitionState -> Community -> S.Set NodePair
+getCommunityEdges PartitionState {..} c = 
+    eLookup "unknown community (edges)" c psEdges
+
+-- | What nodes belong to the community? We assume that the
+--   community exists.
+getCommunityNodes :: PartitionState -> Community -> S.Set NodeLabel
+getCommunityNodes PartitionState {..} c = 
+    eLookup "unknown community (nodes)" c psNodes
+
+-- | What is the link density for the community (given by the
+--   edges in the community and the nodes in the community).
+--
+--   We calculate the per-community part of Equation 3, rather
+--   than equation 2, since we wish to calculate the partition
+--   density. The normalisation by 2/M is done elsewhere.
+--
+calcCommunityDensity :: S.Set NodePair -> S.Set NodeLabel -> Density
+calcCommunityDensity es ns = 
+    let m = fromIntegral $ S.size es -- number of links in the community
+        n = fromIntegral $ S.size ns -- number of induced nodes
+
+    in if n > 2 then m * (m - n + 1) / ((n - 2) * (n - 1)) else 0
+
+-- Merge the two communities, attempting to merge smaller into larger
+-- as that is more efficient for S.union (as it uses the
+-- hedge-union algorithm).
+--
+doCommunityMerge :: 
+    PartitionState  -- ^ input state
+    -> Community       -- ^ community to merge
+    -> Community       -- ^ community to merge
+    -> PartitionState  -- ^ merged communities (and the id of the smaller one removed)
+doCommunityMerge ps@(PartitionState {..}) c1 c2 =
+    -- x1 and x2 refer to properties of community 1 and 2.
+    -- x1' and x2' are the same but this time we have ensured that
+    --   community 1' is larger than 2' (for merging)
+    -- values without a suffix or ' are intended to be the
+    --   merged value
+    --
+    let e1 = getCommunityEdges ps c1
+        e2 = getCommunityEdges ps c2
+             
+        -- to ensure that the unions are efficient, we want the
+        -- largest set first
+        (c1', c2', e1', e2') = if S.size e2 > S.size e1 then (c2, c1, e2, e1) else (c1, c2, e1, e2)
+
+        -- merge the edges
+        edges = e1' `S.union` e2'
+
+        -- merge the nodes from the newly-added edges into the node set for this
+        -- community
+        n1' = getCommunityNodes ps c1'
+        n2' = getCommunityNodes ps c2' -- only needed for the density calculation
+        nodeList2' = S.foldl' (\s (NP (n1,n2)) -> S.insert n1 $ S.insert n2 s) S.empty e2'
+        nodes = if S.size n1' >= S.size nodeList2' then n1' `S.union` nodeList2' else nodeList2' `S.union` n1'
+
+        -- update the community map to move all the edges in c2' into c1'
+        moveCommunity :: M.Map NodePair Community -> NodePair -> M.Map NodePair Community
+        moveCommunity orig e = M.adjust (const c1') e orig
+
+        cmap = S.foldl' moveCommunity psCommunity e2'
+        emap = M.delete c2' $ M.adjust (const edges) c1' psEdges
+        nmap = M.delete c2' $ M.adjust (const nodes) c1' psNodes
+
+        -- add in the density for the new community and remove the
+        -- contribution from the old communities.
+        deltaDensity = calcCommunityDensity edges nodes 
+                       - calcCommunityDensity e1' n1'
+                       - calcCommunityDensity e2' n2'
+
+    in PartitionState (psDensity + deltaDensity) cmap emap nmap
+
+type CommunityState = State PartitionState
+
+-- | If the two edges are not in the same community, merge the two
+--   communities, otherwise do nothing.
+mergeEdgeCommunities :: (NodePair, NodePair) -> CommunityState ()
+mergeEdgeCommunities (e1,e2) = do
+  ps <- get
+  let c1 = getCommunity ps e1
+      c2 = getCommunity ps e2
+  unless (c1 == c2) $ put $ doCommunityMerge ps c1 c2
+
+-- | Merge the edges at the given similarity level.
+--
+mergePairs :: 
+    Density   -- ^ normalization term (2/M) of equation 3
+    -> (Similarity, S.Set (NodePair, NodePair)) -- ^ the node triples for the given similarity level
+    -> CommunityState (Similarity, Density, M.Map Community (S.Set NodePair))
+       -- ^ The similarity, density *before* the merge, and community map
+       --   *before* the merge
+mergePairs norm (sim, pairs) = do
+  PartitionState {..} <- get
+  mapM_ mergeEdgeCommunities $ S.toList pairs
+  return (sim, norm * psDensity, psEdges)
+
+-- | Create the starting state, where each edge belongs to its own
+--   community.
+--
+startEdges :: S.Set NodePair -> (M.Map NodePair Community, M.Map Community (S.Set NodePair), M.Map Community (S.Set NodeLabel))
+startEdges es = 
+    let cmap = M.fromAscList $ zip (S.toAscList es) [1..]
+        nodepair om np comm = M.insertWith' const comm (S.singleton np) om
+        nodelabel om (NP (n1,n2)) comm = M.insertWith' const comm (S.fromList [n1,n2]) om
+        f g = M.foldlWithKey' g M.empty cmap
+    in (cmap, f nodepair, f nodelabel)
+
+-- | Return (a,b) pairs and the values at the peak.
+--
+--   This is very ugly. Is it really worth it?
+findPeak :: 
+    Ord b 
+    => [(a, b, c)]
+    -> Maybe ([(a, b)], (a,b), c)
+findPeak [] = Nothing
+findPeak ((a1,b1,c1):xs) = 
+    let go [] rs amax bmax cmax = (reverse rs, (amax,bmax), cmax)
+        go ((a,b,c):os) rs amax bmax cmax | b >= bmax = go os ((a,b):rs) a b c
+                                          | otherwise = go os ((a,b):rs) amax bmax cmax
+
+                                             {- If we want to find the first "peak"
+                                              let go2 [] rs' = (reverse rs', (amax,bmax), cmax)
+                                                  go2 ((a',b',_):os') rs' = go2 os' ((a',b'):rs')
+                                              in go2 os ((a,b):rs)
+                                              -}
+    in Just $ go xs [(a1,b1)] a1 b1 c1
+
+-- | Create the partitions given the similarities. To restrict to a given
+--   threshold then @takeWhile ((>=threshold). fst)@ the input list.
+--
+--   This may not handle all the edge cases you can get.
+--
+--   The results do not quite match the Python code from
+--   http://barabasilab.neu.edu/projects/linkcommunities/, primarily in that
+--   their results seem to include an extra (1.0,0.0) at the head of the
+--   Similarity/Density list.
+--
+--   Note that the community labels (here integers) are not guaranteed to
+--   match that of the Ahn et al. code.
+--   
+mergeSimilarities :: 
+    Edges      -- ^ graph
+    -> SimMap  -- ^ similarity values for all node triples in the graph
+    -> Maybe ([(Similarity, Density)], (Similarity,Density),  M.Map Community (S.Set NodePair))
+       -- ^ returns Nothing for an empty graph, otherwise a list of the density values
+       --   as a function of similarity, the best (Similarity, Density), and the
+       --   community map at this point.
+mergeSimilarities (EG edges) simMap = 
+    -- I think I am missing a trick here, in that there is a "shift" between the work done
+    -- in a loop and the values that are returned from the loop (e.g. note the *before*
+    -- caveats in mergePairs) and the need to augment the return values with a 0-similarity
+    -- level result.
+    let (icoms, iedges, inodes) = startEdges edges
+        s0 = PartitionState 0 icoms iedges inodes
+        norm = 2 % toInteger (S.size edges)
+        (xs, s1) = runState (mapM (mergePairs norm) (M.toDescList simMap)) s0
+    in findPeak $ xs ++ [(0.0, norm * psDensity s1, psEdges s1)]
+
+-- Reverse the order of the sort (at present I do not have a modern
+-- enough base to have Data.Ord.Down).
+rcomparing :: Ord b => (a,b) -> (a,b) -> Ordering
+rcomparing (_,b1) (_,b2) | b1 > b2   = LT
+                         | b1 < b2   = GT
+                         | otherwise = EQ
+
+-- Renumber the communities so that
+--   a) go from 1 to ncommunities
+--   b) they are ordered by number of edges in the community (in
+--      descending order)
+--
+renumber :: M.Map Community (S.Set b) -> M.Map Community (S.Set b)
+renumber orig = 
+    let oldcomms = map fst $ sortBy rcomparing $ M.assocs $ M.map S.size orig
+        -- for now assume the number of communities is small enough that
+        -- an association list is sufficient
+        conv = zip oldcomms [1..]
+        keyConv k = fromMaybe (error ("*Internal error* missing key " ++ show k)) $ lookup k conv
+    in M.mapKeys keyConv orig
+
+-- | Given a set of edges, return the community information at the optimal theshold.
+--
+--   This is an implementation of the algorithm described in 
+--   Ahn, Bagrow and Lehmann,
+--   Link communities reveal multiscale complexity in networks,
+--   Nature (doi:10.1038/nature09182).
+--
+--   At present only unweighted and undirected graphs are supported.
+--
+findCommunities :: 
+    Edges -- ^ The input graph.
+    -> Maybe ([(Similarity, Density)], (Similarity,Density),  M.Map Community (S.Set NodePair))
+       -- ^ returns Nothing for an empty graph, otherwise a list of the density values
+       --   as a function of similarity, the best (Similarity, Density), and the
+       --   community map at this point. The communities are numbered by descending
+       --   number of edges in the community (with community nnumbers starting at 1).
+findCommunities es = 
+    let sim = edgeSimilarities $ toAL es
+    in case mergeSimilarities es sim of
+         Nothing -> Nothing
+         Just (a,b,c) -> Just (a, b, renumber c)
+
+{-
+-- TEST
+
+hack :: Int -> Int -> NodePair
+hack a b = unsafeToNP (NL a) (NL b)
+
+p12 = hack 1 2
+p13 = hack 1 3
+p24 = hack 2 4
+p310 = hack 3 10
+p41 = hack 4 1
+p42 = hack 4 2
+
+p410 = hack 4 10
+p32 = hack 3 2
+p54 = hack 5 4
+p67 = hack 6 7
+
+edges1, edges2, edges3, edges4 :: Edges
+edges1 = EG $ S.fromList [p12, p13, p24, p310, p41, p42]
+edges2 = EG $ S.fromList [p12, p13, p24, p310, p41, p42, p410, p32]
+edges3 = EG $ S.fromList [p12, p13, p54, p67]
+edges4 = EG $ S.fromList [p12, p54, p67]
+
+-- al, al2, al3 :: AdjacencyList
+-- al = toAL edges1
+-- al2 = toAL edges2
+-- al3 = toAL edges3
+-- al4 = toAL edges4
+
+-}

File GetUserConnections.hs

   ./getuserconnections csv <endpoint>
   ./getuserconnections json <endpoint>
   ./getuserconnections sif <endpoint>
+  ./getuserconnections communities <endpoint>
 
 Aim:
 
 assukming the count value is one of a small number of values so it can be
 considered to be a small set of labels for v 1.0.0 of biofabric).
 
+The communities argument displays debugging output related to the
+community detection.
+
 When no argument is given an "unspecified" screen output is used
 (for debugging).
 
 import qualified Data.Set as S
 import qualified Data.Text as T
 
-import Database.HaSparqlClient (Query, NamedGraph, BindingValue(..))
+-- import Control.Monad (forM_)
 
 import Data.Hashable (Hashable(..))
 import Data.List (foldl')
-import Data.Maybe (fromMaybe, fromJust)
+import Data.Maybe (fromJust, fromMaybe, mapMaybe)
 import Data.Time (UTCTime)
+-- import Data.Tuple (swap)
+
+import Database.HaSparqlClient (Query, NamedGraph, BindingValue(..))
 
 import System.Environment (getArgs, getProgName)
 import System.Exit (exitFailure)
 
 import Data.Aeson
 
+import CommDetect ( NodeLabel, NodePair
+                  -- , fromNodePair
+                  , toNodePair, toNodeLabel, toEdges
+                  , findCommunities)
 import SPARQL ( BasicUserInfo(..), BasicTweetCount(..)
               , FollowerInfo(..)
               , UserId
 #endif
   hashWithSalt s = hashWithSalt s . fromUserId
 
+type Community = Int
+
 {-
 Find out who references who.
 -}
   , "} GROUP BY ?cid ?pid"
   ]
 
+-- | Find what community an edge is in
+getCommunityMap :: M.Map Community (S.Set NodePair) -> M.Map NodePair Community
+getCommunityMap =
+    let addSet orig comm pairs = S.foldl' (\o np -> M.insert np comm o) orig pairs
+    in M.foldlWithKey' addSet M.empty
+
 {-
 Run the query against the given endpoint, collecting
 up the results.
 new info (i.e. we will not get connection information
 for users we do not know about).
 -}
-queries :: String -> IO ([UserConn], (UTCTime, UTCTime))
+queries :: String -> IO ([UserConn], Int, (UTCTime, UTCTime))
 queries endpoint = do
   stores <- queryStore endpoint
   (firstTime, lastTime) <- getTimeRange endpoint stores
   followInfo <- getNumberFollowInfo endpoint stores
   
   let um = makeUserMap buinfo followInfo ntweets
-  return (map (toUserConn um) conns, (firstTime, lastTime))
+      times = (firstTime, lastTime)
+
+      -- identify communities
+      getU (u1,u2,_,_) = [u1,u2]
+      userPairs = zip (S.toAscList $ S.fromList $ concatMap getU conns) $ map toNodeLabel [1..]
+      users = M.fromDistinctAscList userPairs
+      -- rusers = M.fromList $ map swap userPairs
+      userIndex u = fromMaybe (error ("*Internal error* unknown user " ++ show u)) $ M.lookup u users
+      -- getName i = fromMaybe (error ("*Internal error* unknown user label " ++ show i)) $ M.lookup i rusers
+
+      toNP (u1,u2,_,_) = toNodePair (userIndex u1) (userIndex u2)
+
+      e = toEdges $ S.fromList $ mapMaybe toNP conns
+      (ncomms, commMap) = case findCommunities e of
+                            Nothing -> (0, M.empty)
+                            Just (_,_,cmap) -> (M.size cmap, getCommunityMap cmap)
+
+  return (map (toUserConn um commMap userIndex) conns, ncomms, times)
   
 data UserInfo = 
   UI 
     UC
     { ucFrom      :: UserInfo  -- ^ Account that wrote the tweets
     , ucTo        :: UserInfo  -- ^ Account that was mentioned
+    , ucCommunity :: Int       -- ^ community number (or 0 if no community; e.g. ucFrom==ucTo)
     , ucNTweets   :: Int       -- ^ Number of all tweets
     , ucNRetweets :: Int       -- ^ Number of retweets
     } deriving (Eq, Show)
 
 -- could easily make this return Maybe UserConn instead but for now
 -- just let it error out
-toUserConn :: UserMap -> (UserId, UserId, Int, Int) -> UserConn
-toUserConn um (cid, pid, nt, nrt) = UC (toUserInfo um cid) (toUserInfo um pid) nt nrt
+toUserConn :: UserMap -> M.Map NodePair Community -> (UserId -> NodeLabel) -> (UserId, UserId, Int, Int) -> UserConn
+toUserConn um cmap nlbl (cid, pid, nt, nrt) = 
+    let u1 = toUserInfo um cid 
+        u2 = toUserInfo um pid
+        commid = case toNodePair (nlbl cid) (nlbl pid) of
+                   Nothing -> 0
+                   Just np -> fromMaybe 0 $ M.lookup np cmap
+    in UC u1 u2 commid nt nrt
 
 toUserInfo :: UserMap -> UserId -> UserInfo
 toUserInfo um uid =
 
 The ordering for users is currently unspecified.
 -}
-displayJSON :: ([UserConn], (UTCTime, UTCTime)) -> IO ()
-displayJSON (us, ts) = 
+displayJSON :: ([UserConn], Int, (UTCTime, UTCTime)) -> IO ()
+displayJSON (us, ncomm, ts) = 
   let uSet = foldl' (\s1 UC {..} -> S.insert ucTo (S.insert ucFrom s1)) S.empty us
       users = S.toList uSet
       uMap = zip users [0::Int ..]
                , "target"  .= getUserIndex ucTo
                , "ntweets" .= ucNTweets
                , "nretweets" .= ucNRetweets
+               , "community" .= ucCommunity
                ]
         
       j = encode $ object 
           [ "firstTweet" .= fst ts
           , "lastTweet" .= snd ts
+          , "numCommunities" .= ncomm
           , "nodes" .= users
           , "links" .= map mkLink us ]
       
   in LB.putStrLn j
 
+fst3 :: (a,b,c) -> a
+fst3 (a,_,_) = a
+
 {-
 The CSV-like format used by Gephi, namely
 
 
 For now we do not display the retweet count
 -}
-displayCSV :: ([UserConn], a) -> IO ()
+displayCSV :: ([UserConn], a, b) -> IO ()
 displayCSV = 
   let asCSV UC {..} = T.unpack (getCSVLabel ucFrom) ++ "," ++ T.unpack (getCSVLabel ucTo) ++ ";" ++ show ucNTweets
-  in mapM_ (putStrLn . asCSV) . fst
+  in mapM_ (putStrLn . asCSV) . fst3
   
-displaySIF :: ([UserConn], a) -> IO ()
+displaySIF :: ([UserConn], a, b) -> IO ()
 displaySIF = 
   let asSIF UC {..} = T.unpack (getCSVLabel ucFrom) ++ "\t" ++ show ucNTweets ++ "\t" ++ T.unpack (getCSVLabel ucTo)
-  in mapM_ (putStrLn . asSIF) . fst
-  
+  in mapM_ (putStrLn . asSIF) . fst3
+
+-- Ideally would like to store the community information somewhere
+displayCommunities :: ([UserConn], Int, a) -> IO ()
+displayCommunities (ucs, ncomm, _) = 
+    if ncomm == 0
+    then putStrLn "No communities found!"
+    else do
+      putStrLn $ "*** Found " ++ show ncomm ++ " communities"
+      mapM_ (putStrLn . show) ucs 
+
+{-
+         Just (dlist, dmax, cmap) -> do
+                                putStrLn $ "*** Found " ++ show (M.size cmap) ++ " communities"
+                                putStrLn $ "*** Best Similarity,Density = " ++ show dmax
+                                putStrLn $ "*** (S,D) pairs = " ++ show dlist
+                                forM_ (M.assocs cmap) $ \(comm, pairs) ->
+                                    let getNames np = let (a,b) = fromNodePair np
+                                                      in (uiHandle (getName a), uiHandle (getName b))
+                                        ns = map getNames $ S.toAscList pairs
+                                    in putStrLn $ " " ++ show comm ++ " : " ++ show ns
+-}
+                                
 {-
 Just dump the output.
 -}
-displayDebug :: ([UserConn], a) -> IO ()
-displayDebug = mapM_ print . fst
+displayDebug :: ([UserConn], a, b) -> IO ()
+displayDebug = mapM_ print . fst3
 
 usage :: IO ()
 usage = do
     (a:endpoint:[]) | a == "json" -> queries endpoint >>= displayJSON
                     | a == "csv"  -> queries endpoint >>= displayCSV
                     | a == "sif"  -> queries endpoint >>= displaySIF
+                    | a == "communities"  -> queries endpoint >>= displayCommunities
     _ -> usage

File astrosearch.cabal

 
 Executable       getuserconnections
   Main-Is:       GetUserConnections.hs
-  Other-Modules: SPARQL
+  Other-Modules: CommDetect
+                 SPARQL
                  Utils
 
   ghc-options: -Wall -fno-warn-orphans
     hasparql-client >= 0.3.1 && < 0.4,
     http-conduit == 1.8.*,
     old-locale == 1.0.*,
+    mtl == 2.1.*,
     network == 2.3.*,
     swish == 0.9.*,
     text == 0.11.*,