Commits

Doug Burke committed 49ad17a

Added basic information on the "publisher" of each tweet

Comments (0)

Files changed (1)

     , [Counter UserId]   -- ^ the most-mentioned accounts
     , [Counter String]   -- ^ the most-mentioned hash tags
     , [Counter URIInfo]  -- ^ the most-mentioned URLs
+    , [Counter T.Text]   -- ^ the most-used publishers
     , [(Int, Int, Int, Int)] -- ^ num friends, num followers, total num of tweets, num of retweets
     , M.Map UserId BasicUserInfo -- ^ map from user id to user labels
     , UTCTime -- ^ approximate time of first tweet (or when the search was made)
   , "} GROUP BY ?userid"
   ]
 
+queryPublisherCount :: [NamedGraph] -> Query
+queryPublisherCount ngs = 
+  unwords
+  [ "prefix sioct: <http://rdfs.org/sioc/types#>"
+  , "prefix dcterms: <http://purl.org/dc/terms/>"
+  , "SELECT ?publisher (COUNT(?tw) as ?ntweets) "
+  , fromStores ngs
+  , " WHERE {"
+  , "  ?tw a sioct:MicroblogPost ; dcterms:publisher ?publisher ."
+  , "} GROUP BY ?publisher"
+  ]
+
+{-
+As I have not declared a "type" for publishers, we have to use
+the constraint 
+   [] a sioct:MicroblogPost ; dcterms:publisher ?publisher
+- or just [] dcterms:publisher ?publisher but the above is a bit
+more specific - which means that we get back a label for each tweet,
+even when they are the same label. This means we can not use
+GROUP_CONCAT. I could use SAMPLE to just pick one; this seems like
+a good idea, so do that.
+-}
+queryPublisherInfo :: [NamedGraph] -> Query
+queryPublisherInfo ngs = 
+  unwords
+  [ "prefix sioct: <http://rdfs.org/sioc/types#>"
+  , "prefix dcterms: <http://purl.org/dc/terms/>"
+  , "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>"
+  , "SELECT ?publisher (SAMPLE(?labels) as ?label) "
+  , fromStores ngs
+  , " WHERE {"
+  , "  [] a sioct:MicroblogPost ; dcterms:publisher ?publisher ."
+  , "  ?publisher rdfs:label ?labels ."
+  , "} GROUP BY ?publisher"
+  ]
+
+
 getCount :: [BindingValue] -> Maybe Int
 getCount (_:bv:[]) = fromBinding bv
 getCount _ = Nothing
   reTweetTimes     <- makeQuery toDelta   endpoint (queryTimeToRetweet stores)
   followerCount    <- makeQuery (toFollowerCount countMap) endpoint (queryFollowerCount stores)
 
+  publisherCount   <- makeQuery toCounter endpoint (queryPublisherCount stores)
+  publisherInfo    <- makeQuery to2       endpoint (queryPublisherInfo stores)
+
   uinfo            <- getBasicUserInfo endpoint stores
 
   let f :: (Ord a) => [Counter a] -> [Counter a]
       rtwDelta10MinHist = calcHist 600.0 reTweetTimes
       rtwDelta6HourHist = calcHist 21600.0 reTweetTimes
 
+      -- replace URI by label for publishers; we assume that there is
+      -- always a label
+      convPublisher :: N.URI -> T.Text
+      convPublisher = fromJust . (`lookup` publisherInfo)
+      pCount = map (fmap convPublisher) publisherCount
+      
   return ( M.size countMap
          , M.size onlyRetweetMap
          , ntws, nrtws
          , toHistogram userRetweetCount
          , rtwDelta10MinHist
          , rtwDelta6HourHist
-         , ftws, f repliedUser, f garrulousUser, f retweetUser, f mentionedUser, f mentionedHashTag, f urls, followerCount, uinfo, firstTime, lastTime)
+         , ftws, f repliedUser, f garrulousUser, f retweetUser
+         , f mentionedUser, f mentionedHashTag, f urls
+         , f pCount
+	 , followerCount
+         , uinfo, firstTime, lastTime)
 
 {-
 Instead of a KDE, just create a histogram.
 displayCount :: 
     QueryResponse
     -> IO ()
-displayCount (n, nrtonly, ntws, nrtws, rtwHist, utwHist, urtwHist, _, _, rtStatus, repliedUser, garrulousUser, rtUser, mUser, mHT, mURL, _, uMap, ftime, ltime) = do
+displayCount (n, nrtonly, ntws, nrtws, rtwHist, utwHist, urtwHist, _, _, rtStatus, repliedUser, garrulousUser, rtUser, mUser, mHT, mURL, mPub, _, uMap, ftime, ltime) = do
   putStrLn $ ">> Number of tweeters: " ++ show n
   putStrLn $ ">> Number of tweeters that *only* retweeted: " ++ show nrtonly
   putStrLn $ ">> First tweet: " ++ show ftime
   forM_ urtwHist $ \(k,v) -> when (v > 0) (putStrLn $ "Num tweets = " ++ show k ++ " " ++ show v ++ " times")
   putStrLn "*** Most mentioned URL"
   forM_ mURL $ \(Counter u cnt) -> putStrLn $ show cnt ++ " <" ++ show u ++ ">"
+  putStrLn "*** Most used publisher"
+  forM_ mPub $ \(Counter p cnt) -> putStrLn $ show cnt ++ " " ++ T.unpack p
 
 -- | Replace the id of a user with a human-readable label
 replaceUserId :: M.Map UserId BasicUserInfo -> Counter UserId -> Counter String
 displayAsJSON ::
     QueryResponse
     -> IO ()
-displayAsJSON (n, nrtonly, ntws, nrtws, rtwHist, utwHist, urtwHist, rtwDelta10MinHist, rtwDelta6HourHist, rtStatus, repliedUser, garrulousUser, rtUser, mUser, mHT, mURL, friendCount, uMap, fTime, lTime) = do
+displayAsJSON (n, nrtonly, ntws, nrtws, rtwHist, utwHist, urtwHist, rtwDelta10MinHist, rtwDelta6HourHist, rtStatus, repliedUser, garrulousUser, rtUser, mUser, mHT, mURL, mPub, friendCount, uMap, fTime, lTime) = do
   let fixUser = map (replaceUserId uMap)
 
       toF (nfr,nfoll,nt,nrt) = object [ "follow" .= nfr
                    , "retweetDeltaHist6Hour" .= rtwDelta6HourHist
                    , "uris" .= mURL
                    , "hashtag" .= mHT
+                   , "publisher" .= mPub
                    , "followerCount" .= map toF friendCount
                    ]