Commits

Doug Burke committed 8fb833b

Update to hasparql-client 0.3.0 to support named graphs via protocol (code not fully updated to use named graphs and the protocol-level specification does not seem to work)

Comments (0)

Files changed (9)

 
 {-
 
+WARNING: this code has not been updated to use named graphs yet as
+I have not worked out how best to merge the data given the rewrite
+rules we have.
+
 Usage:
 
   calchistogram <ingraph>
 tweet times and the re-tweet times.
 -}
 query :: String -> IO (UV.Vector Double, UV.Vector Double)
-query endpoint = splitVals `liftM` makeQuery getVals endpoint queryTweets
+query endpoint = splitVals `liftM` makeQuery getVals endpoint [] queryTweets
 
 {-
 getVals :: [BindingValue] -> Maybe (Double, Maybe Bool)
 
 {-
 
+WARNING: this code has not been updated to use named graphs yet as
+I have not worked out how best to merge the data given the rewrite
+rules we have.
+
 Usage:
 
   ./counttweets <endpoint> [mincounts]
 up the results. The output order is not guaranteed.
 -}
 query :: String -> IO [UserInfo]
-query endpoint = makeQuery toUI endpoint queryUserCount
+query endpoint = makeQuery toUI endpoint [] queryUserCount
 
 displayCount :: UserInfo -> IO ()
 displayCount UI {..} = 
 
 {-
 
+Not validated against the use of named graphs yet (since data now
+contains repeated triples).
+
 Usage:
 
   ./findretweets <endpoint> [simple|distance|unknown|complex]
 Aim:
 
 Identify possible re-tweets using simple heuristics -
-"does the text start with RT" - and output data in a Turtle
+e.g. "does the text start with RT" - and output data in a Turtle
 format.
 
 Interestingly, if you do not do the simple method and just
 import Swish.RDF.TurtleFormatter (formatGraphAsText)
 import Swish.RDF.Vocabulary.SIOC (siocreply_of)
 
-import Database.HaSparqlClient (Query, BindingValue(..))
+import Database.HaSparqlClient (Query, NamedGraph, BindingValue(..))
 
 import Control.Applicative ((<$>), (<*>))
 -- import qualified Control.Exception as CE
 import System.Exit (exitFailure)
 import System.IO (stderr, hPutStrLn)
 
-import SPARQL (FromBinding(..), makeQuery)
+import SPARQL (FromBinding(..), makeQuery, queryStore)
 
 import Text.EditDistance
 
 then do we want to identify the original tweet or the re-tweet by
 @userb? I suggest the original retweet.
 -}
-findMatchSimple :: String -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
-findMatchSimple url (uri0, time0, txt0) = do
+findMatchSimple :: String -> [NamedGraph] -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
+findMatchSimple url ngs (uri0, time0, txt0) = do
   
   -- let hdlError :: HttpException -> IO [a]
   --     hdlError _ = return []
       
   -- res <- flip CE.catch hdlError $ makeQuery getVals url (queryOriginalTweet uri0 time0 txt0)
-  res <- makeQuery getVals url (queryOriginalTweet uri0 time0 txt0)
+  res <- makeQuery getVals url ngs (queryOriginalTweet uri0 time0 txt0)
   
   {-
   unless (null res) $ do
 side of safety here. Relying on some later pass to pick up the extra
 matches.
 -}
-findMatchComplex :: String -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
-findMatchComplex url (uri0, time0, txt0) = 
+findMatchComplex :: String -> [NamedGraph] -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
+findMatchComplex url ngs (uri0, time0, txt0) = 
   case splitOnRT txt0 of
     Nothing -> return Nothing
     Just (_, uname, r) -> do
           -- is a little-bit more permissive than this, but should not cause a problem.
           getDiff = calcDiff (T.unpack (T.dropWhile (`elem` " :") r))
                 
-      res <- makeQuery getVals url qry
+      res <- makeQuery getVals url ngs qry
   
       let (uris, _, txts) = unzip3 res
           lastElem (_,_,a) = a
 calcDiff = 
   restrictedDamerauLevenshteinDistance defaultEditCosts
 
-findMatchDistance :: String -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
-findMatchDistance url (uri0, time0, txt0) = do
+findMatchDistance :: String -> [NamedGraph] -> (URI, UTCTime, T.Text) -> IO (Maybe [RDFTriple])            
+findMatchDistance url ngs (uri0, time0, txt0) = do
   
   let (mtxt, qry) = queryDistanceTweet uri0 time0 txt0
-  res <- makeQuery getVals url qry
+  res <- makeQuery getVals url ngs qry
   
   let getDiff = calcDiff (T.unpack mtxt)
   
       
 findRetweetsSimple :: String -> IO RDFGraph
 findRetweetsSimple epoint = do
-  res <- makeQuery getVals epoint queryForRetweets
+  stores <- queryStore epoint
+  res <- makeQuery getVals epoint stores queryForRetweets
   hPutStrLn stderr $ "Found " ++ show (length res) ++ " tweets"
-  mms <- mapM (findMatchSimple epoint) res
+  mms <- mapM (findMatchSimple epoint stores) res
   return $ toRDFGraph $ concat $ catMaybes mms
 
 findRetweetsDistance :: String -> IO RDFGraph
 findRetweetsDistance epoint = do
-  res <- makeQuery getVals epoint queryForRetweets
+  stores <- queryStore epoint
+  res <- makeQuery getVals epoint stores queryForRetweets
   hPutStrLn stderr $ "Found " ++ show (length res) ++ " tweets"
-  mms <- mapM (findMatchDistance epoint) res
+  mms <- mapM (findMatchDistance epoint stores) res
   return $ toRDFGraph $ concat $ catMaybes mms
 
 findRetweetsUnknown :: String -> IO RDFGraph
 findRetweetsUnknown epoint = do
-  res <- makeQuery getVals epoint queryForRetweets
+  stores <- queryStore epoint
+  res <- makeQuery getVals epoint stores queryForRetweets
   hPutStrLn stderr $ "Found " ++ show (length res) ++ " tweets"
   let ms = map (\(u,_,_) -> toRDFTriple u isRetweet True) res
   return $ toRDFGraph ms
 
 findRetweetsComplex :: String -> IO RDFGraph
 findRetweetsComplex epoint = do
-  res <- makeQuery getVals epoint queryForRetweetsComplex
+  stores <- queryStore epoint
+  res <- makeQuery getVals epoint stores queryForRetweetsComplex
   hPutStrLn stderr $ "Found " ++ show (length res) ++ " tweets"
-  mms <- mapM (findMatchComplex epoint) res
+  mms <- mapM (findMatchComplex epoint stores) res
   return $ toRDFGraph $ concat $ catMaybes mms
 
 usage :: IO ()
 usage = do
   pName <- getProgName
-  hPutStrLn stderr $ "Usage: " ++ pName ++ " <endpoint> [simple|distance]"
+  hPutStrLn stderr $ "Usage: " ++ pName ++ " <endpoint> [simple|distance|unknown|complex]"
   exitFailure
   
 main :: IO ()
 {-
 
+WARNING: this code has not been updated to use named graphs yet as
+I have not worked out how best to merge the data given the rewrite
+rules we have.
+
 Usage:
 
-  ./gettweets <endpoint> [query]
+  ./gettweets <endpoint>
   ./gettweets <endpoint> query
 
 Aim:
 up the results.
 -}
 query :: String -> Query -> IO [(UTCTime, T.Text)]
-query = makeQuery getVals
+query epoint = makeQuery getVals epoint []
 
 getVals :: [BindingValue] -> Maybe (UTCTime, T.Text)
 getVals (timebv:textbv:[]) = 

GetUserConnections.hs

 
 {-
 
+WARNING: this code has not been updated to use named graphs yet as
+I have not worked out how best to merge the data given the rewrite
+rules we have.
+
 Usage:
 
   ./getuserconnections <infile1> .. <infileN>
 -}
 queries :: String -> IO [UserConn]
 queries endpoint = do
-  users <- makeQuery getUser endpoint queryUsers
-  followers <- makeQuery getFollowers endpoint queryNumFollowers
+  users <- makeQuery getUser endpoint [] queryUsers
+  followers <- makeQuery getFollowers endpoint [] queryNumFollowers
   
   let um1 = foldl' addUser HM.empty users
       um2 = foldl' updateFollowers um1 followers
       
-  conns <- makeQuery getConn endpoint queryConnections
+  conns <- makeQuery getConn endpoint [] queryConnections
   
   return $ map (toUserConn um2) conns
   
 defaultEndpoint :: String
 defaultEndpoint = "http://localhost:8080/sparql/"
 
-mkService :: String -> Query -> Service Query
-mkService epoint qry = Sparql epoint qry Nothing [] []
+mkService :: String -> Query -> Service
+mkService epoint qry = Sparql epoint qry [] [] []
 
 prefixes :: [Query]
 prefixes = [ "prefix sioc: <http://rdfs.org/sioc/ns#>"
 out :: String -> IO ()
 out = hPutStrLn stderr
 
-doQuery :: ToSPARQL a => Service a -> Method -> IO ()
+doQuery :: Service -> Method -> IO ()
 doQuery src m = do
   out $ "*** Running query: " ++ show m
   sel <- runSelectQuery src m
 import qualified Data.Text.IO as T
 import qualified Data.ByteString.Lazy as BL
 
-import Database.HaSparqlClient (Query, BindingValue(..))
+import Database.HaSparqlClient (Query, NamedGraph, BindingValue(..))
 
 import Network.HTTP.Conduit (simpleHttp)
 
 import Control.Applicative ((<$>), (<*>))
 import Control.Monad (unless, mzero)
 
-import SPARQL (makeQuery, queryStore)
+import SPARQL (makeQuery, queryStore, )
 import RDFUtils (triple)  
 
 {-
 
 -}
 
-queryReferents :: 
-  [String]   -- ^ list of named graphs to include (can be empty)
-  -> Query
-queryReferents ngs =
-  let from = unwords $ map (\n -> "FROM <" ++ n ++ ">") ngs
-  in unwords
-     [ "prefix sioc: <http://rdfs.org/sioc/ns#>"
-     , "prefix sioct: <http://rdfs.org/sioc/types#>"
-     , "prefix dcterms: <http://purl.org/dc/terms/>"
-     , "prefix foaf: <http://xmlns.com/foaf/0.1/>"
-     , "SELECT DISTINCT ?account "
-     ] ++ 
-     from ++
-     unwords [
-       " WHERE {"
-       , "  [] a sioct:MicroblogPost ; dcterms:references ?account ."
-       , "  OPTIONAL { ?account sioc:id ?id }"
-       , "  FILTER (!bound(?id))"
-       , "}"
-       ]
+queryReferents :: Query
+queryReferents =
+  unwords
+  [ "prefix sioc: <http://rdfs.org/sioc/ns#>"
+  , "prefix sioct: <http://rdfs.org/sioc/types#>"
+  , "prefix dcterms: <http://purl.org/dc/terms/>"
+  , "prefix foaf: <http://xmlns.com/foaf/0.1/>"
+  , "SELECT DISTINCT ?account WHERE { "
+  , "  [] a sioct:MicroblogPost ; dcterms:references ?account ."
+  , "  OPTIONAL { ?account sioc:id ?id }"
+  , "  FILTER (!bound(?id))"
+  , "}"
+  ]
 
-queryUsers :: 
-  [String]   -- ^ list of named graphs to include (can be empty)
-  -> Query
-queryUsers ngs =
-  let from = unwords $ map (\n -> "FROM <" ++ n ++ ">") ngs
-  in unwords
-     [ "prefix sioc: <http://rdfs.org/sioc/ns#>"
-     , "prefix sioct: <http://rdfs.org/sioc/types#>"
-     , "prefix dcterms: <http://purl.org/dc/terms/>"
-     , "prefix foaf: <http://xmlns.com/foaf/0.1/>"
-     , "SELECT ?user ?id ?handle ?foaf " 
-     ] ++
-     from ++
-     unwords [
-       " WHERE {"
-       , "  ?user a sioc:UserAccount ."
-       , "  OPTIONAL { ?user sioc:id ?id }"
-       , "  OPTIONAL { ?user sioc:name ?handle }"
-       , "  OPTIONAL { ?user foaf:name ?foaf }"
-       , "  FILTER (!bound(?id) || !bound(?handle) || !bound(?foaf))"
-       , "}"
-       ]
-  
+queryUsers :: Query
+queryUsers =
+  unwords
+  [ "prefix sioc: <http://rdfs.org/sioc/ns#>"
+  , "prefix sioct: <http://rdfs.org/sioc/types#>"
+  , "prefix dcterms: <http://purl.org/dc/terms/>"
+  , "prefix foaf: <http://xmlns.com/foaf/0.1/>"
+  , "SELECT ?user ?id ?handle ?foaf WHERE {" 
+  , "  ?user a sioc:UserAccount ."
+  , "  OPTIONAL { ?user sioc:id ?id }"
+  , "  OPTIONAL { ?user sioc:name ?handle }"
+  , "  OPTIONAL { ?user foaf:name ?foaf }"
+  , "  FILTER (!bound(?id) || !bound(?handle) || !bound(?foaf))"
+  , "}"
+  ]
   
 {-
 Run the query against the given endpoint, collecting
 up the results.
 -}
-queryRef :: String -> [String] -> IO [String]
-queryRef endpoint = makeQuery getURIFrag endpoint . queryReferents
+queryRef :: String -> [NamedGraph] -> IO [String]
+queryRef endpoint ngs = makeQuery getURIFrag endpoint ngs queryReferents
 
 -- handle from the Twitter URI
 -- and do we already know sioc:id, sioc:name, foaf:name?
 type UserInfo = (String, Bool, Bool, Bool)
 
-queryUsr :: String -> [String] -> IO [UserInfo]
-queryUsr endpoint = makeQuery getUI endpoint . queryUsers
+queryUsr :: String -> [NamedGraph] -> IO [UserInfo]
+queryUsr endpoint ngs = makeQuery getUI endpoint ngs queryUsers
 
 twURI :: String
 twURI = "http://twitter.com/"
 
 import qualified Data.Text as T
 
-import Database.HaSparqlClient (Service(Sparql), Query, BindingValue(..), Method(HGET), runSelectQuery)
+import Database.HaSparqlClient (Service(Sparql), Query, NamedGraph, BindingValue(..), Method(HGET), runSelectQuery)
 import Data.Maybe (mapMaybe)
 import Data.Time (UTCTime, ParseTime, parseTime)
 
 Run the query against the given endpoint, collecting
 up the results, applying the necessary transform.
 -}
-makeQuery :: ([BindingValue] -> Maybe a) -> String -> Query -> IO [a]
-makeQuery f endpoint query = do
-  let srv = Sparql endpoint query Nothing [] []
+makeQuery :: 
+  ([BindingValue] -> Maybe a)  -- ^ convert SPARQL match into output
+  -> String        -- ^ endpoint 
+  -> [NamedGraph]  -- ^ list of graphs to read into the default graph
+  -> Query         -- ^ query
+  -> IO [a]
+makeQuery f endpoint ngs query = do
+  let srv = Sparql endpoint query ngs [] []
   ans <- runSelectQuery srv HGET
   case ans of
     Left emsg -> hPutStrLn stderr ("ERROR: " ++ emsg) >>
 Find all the named graphs which contain tweet data
 within the endpoint. Returns a list of URIs.
 -}
-queryStore :: String -> IO [String]
+queryStore :: String -> IO [NamedGraph]
 queryStore endpoint = 
-  makeQuery getURI endpoint 
+  makeQuery getNG endpoint []
   "SELECT ?gr { ?gr a <http://purl.org/net/djburke/demo/twitter#TweetStore> . }"
 
-getURI :: [BindingValue] -> Maybe String
-getURI [URI u] = Just u
-getURI _       = Nothing
+getNG :: [BindingValue] -> Maybe NamedGraph
+getNG [URI u] = Just u
+getNG _       = Nothing
 
-
     base >= 3 && < 5,
     bytestring == 0.9.*,
     containers >= 0.3 && < 0.5,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     old-locale == 1.0.*,
     text == 0.11.*,
     time == 1.2.*,
     base >= 3 && < 5,
     bytestring == 0.9.*,
     containers >= 0.3 && < 0.5,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     http-conduit == 1.2.*,
     network >= 2.2 && < 2.4,
     old-locale == 1.0.*,
     aeson == 0.5.*,
     base >= 3 && < 5,
     bytestring == 0.9.*,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     old-locale == 1.0.*,
     text == 0.11.*,
     time == 1.2.*
   Build-Depends:
     base >= 3 && < 5,
     haskeline == 0.6.*,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     old-locale == 1.0.*,
     text == 0.11.*,
     time == 1.2.*
     aeson == 0.5.*,
     base >= 3 && < 5,
     bytestring == 0.9.*,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     old-locale == 1.0.*,
     statistics == 0.10.*,
     time == 1.2.*,
   Build-Depends:
     base >= 3 && < 5,
     haskeline == 0.6.*,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     text == 0.11.*
 
 Executable       findretweets
   Build-Depends:
     base >= 3 && < 5,
     edit-distance == 0.2.*,
-    hasparql-client == 0.2.*,
+    hasparql-client == 0.3.*,
     -- http-conduit == 1.2.*,
     network >= 2.2 && < 2.4,
     old-locale == 1.0.*,