1. Doug Burke
  2. astrosearch

Commits

Doug Burke  committed 768431d

ToRDF: experimental support for chunking data access (incomplete, does not reduce mem use, commented out)

  • Participants
  • Parent commits efa955e
  • Branches default

Comments (0)

Files changed (1)

File ToRDF.hs

View file
  • Ignore whitespace
   -> IO [TweetData]
 getTweets c ms mn mt = do
   acid <- openStore c
-  twts <- case (ms,mn) of
-            (Just s, Just n) -> query acid (GetSubsetTweetEvents s n)
-            (_, Just n) -> query acid (GetTweetEvents n)
-            _ -> query acid GetAllTweetEvents
+  twts <- getTweetData acid ms mn
   closeAcidState acid
   let ans1 = mapMaybe toTweetData . rights . map toTweet' $ twts
       ans2 = case mt of
 
   return ans2
 
+-- | Chunk tweet requests by this size.
+chunkSize :: Int
+chunkSize = 1000
+
+-- | TODO: is there a problem when the number of tweets
+--         changes during the run?
+--
+subsetTweetData ::
+  AcidState TweetStore
+  -> Int     -- ^ start tweet number (0 is the first tweet)
+  -> Int     -- ^ number of tweets to retrieve
+  -> IO [T.Text]
+subsetTweetData acid s n = do
+  nAll <- query acid GetNumberEvents
+  let iStart = max 0 s
+      nEnd = min nAll (s + n)
+      
+      nReq = nEnd - iStart
+      (nLoop, nLeft) = divMod nReq chunkSize
+      
+      go l = query acid $ GetSubsetTweetEvents (l*chunkSize+iStart) chunkSize
+      
+  ts <- mapM go [0..(nLoop-1)]
+  te <- query acid $ GetSubsetTweetEvents (nLoop*chunkSize+iStart) nLeft
+  return $ concat $ ts ++ [te]
+  
+-- | Get the tweet data from the store.
+--
+getTweetData ::
+  AcidState TweetStore
+  -> Maybe Int     -- ^ start tweet number
+  -> Maybe Int     -- ^ number of tweets
+  -> IO [T.Text]
+getTweetData acid ms mn = 
+  case (ms,mn) of
+{-    
+    (Just s, Just n) -> subsetTweetData acid s n
+    (_, Just n) -> do
+      nAll <- query acid GetNumberEvents
+      let iStart = max 0 (nAll-n)
+      subsetTweetData acid iStart n
+    _ -> do
+      nAll <- query acid GetNumberEvents
+      subsetTweetData acid 0 nAll
+-}
+    (Just s, Just n) -> query acid $ GetSubsetTweetEvents s n
+    (_, Just n) -> query acid (GetTweetEvents n)
+    _ -> query acid GetAllTweetEvents
+
 -- | Create a graph containing metadata about the search
 createMetadata ::
   TweetConnection