Commits

Anonymous committed 01acf34

added cleanup for fetch

  • Participants
  • Parent commits 3989b6c

Comments (0)

Files changed (4)

File followerslookup/fetching.py

             else:
                 pass
 
-    # count = 100
-    # users = []
-    # twitter_user = user
-    # # first call to the twitter API
-    # start = datetime.datetime.now()
-    # twitter_response = account.users.lookup(user_id = ','.join(str(id) for id in unfetched_users[:count]))
-    # print datetime.datetime.now() - start
-    # start = datetime.datetime.now()
-    # # we fill the user list with the response
-    # [users.append(user) for user in twitter_response]  
-    # print users
-    # # we save the users 
-    # #save_users(twitter_user,users)
-    # # we unset users list
-    # users = [] 
-    # while count < len(unfetched_users):
-    #     print count, datetime.datetime.now() - start
-    #     start = datetime.datetime.now()
-    #     # we request info from twitter as long as the length of the response is equal to 100.
-    #     try:
-    #         users = account.users.lookup(user_id = ','.join(str(id) for id in unfetched_users[count:count+100]))
-    #         count += 100
-    #         # we save each user in the database
-    #         #[save_user(twitter_user,user) for user in users]
-    #         # we fill the user_list with the response.
-    #         [users.append(user) for user in users]
-    #         print users
-    #         #save_users(twitter_user,users)
-    #         users = []
-    #     except TwitterHTTPError, e:
-    #         # twitter is over capacited or we are rate limited
-    #         if "Rate limit exceeded" in e.response_data:
-    #             raise # operation cannot be continued this hour. Wait next hour
-    #         # twitter is overcapacited, go next
-            
-    # return users
-    return
+    return "Fetch ended"
 
 def save_users(twitter_user,users):
     """
     twitter_user.save()
     return usr
 
+def cleanup(followers_ids,twitter_user):
+    """
+    cleanup look for twitter users no more following a TwitterUser and
+    delete them from the database
+    """
+
+    id_list = User.objects.values('pk')
+    for id in id_list:
+        if not id['pk'] in followers_ids:
+            user = User.objects.get(pk=id['pk'])
+            twitter_user.remove(user)
+            twitter_user.sav()
+            if len(user.twitteruser_set.all()) == 0:
+                user.delete()

File followerslookup/tasks.py

+"""
+This set of applications are divided in two main category :
+
+- Twitter management
+- Csv generation
+
+Twitter management are :
+
+- fetch
+
+Fetch is done to retreive all twiiter user following a
+"TwitterUser". Each time a user is totaly fetched we reset followers
+count and we re-update the followers list next hour
+
+- cleanup
+
+cleanup look for twitter users no more following a TwitterUser and
+delete them from the database
+
+"""
+
 from celery.schedules import crontab
 from celery.decorators import task
 from celery.decorators import periodic_task
 from social_auth.models import UserSocialAuth
 from followerslookup.authentication import get_account
-from followerslookup.fetching import get_followers_infos,get_followers_ids
+from followerslookup.fetching import get_followers_infos,get_followers_ids,cleanup
 from followerslookup.models import TwitterUser, User
 import csv,codecs
 try:
 
 @task()
 def fetch():
+    """
+    Fetch all TwitterUser to get their followers.
+    If we are rate limited, we return : "rate limit reached"
+    If fetch is finished, we return "fetch ended".
+    and we reset the last index to parse for new followers next time.
+    """
     user = UserSocialAuth.objects.all()[0]
     account = get_account(user)
-    for user in TwitterUser.objects.all():
+    for user in TwitterUser.objects.all().order_by("-lastindex"):
         followers_ids = get_followers_ids(account, user.name)
         followers_infos = get_followers_infos(account, followers_ids,user.name )
-    return 'Fetch ended'
+        if followers_infos == "Fetch ended":
+            user.lastindex = 0
+            user.save()
+    return followers_infos
+
+@task()
+def followers_cleanup():
+    """
+    This function clean the Twitter users followers and destroy users
+    that do not fowwow the Twitter User anymore
+    """
+    user = UserSocialAuth.objects.all()[0]
+    account = get_account(user)
+    for user in TwitterUser.objects.all().order_by("-lastindex"):
+        followers_ids = get_followers_ids(account, user.name)
+        launch_cleanup(followers_ids)
+    return "%s cleaned"%user.name
 
 @task()
 def convert_context_to_csv(context, csv_file = StringIO()):

File followerslookup/urls.py

 from django.conf.urls.defaults import patterns, url
-from views import CsvGenericListView,TwitterUserListView#,GenericListView
+from views import CsvGenericListView,TwitterUserDetailView#,GenericListView
 from django.views.generic import TemplateView
 from models import TwitterUser, User
 
         CsvGenericListView.as_view(model = User,paginate_by=100)),
     url(r'^lookup/$',CsvGenericListView.as_view(model = User,paginate_by=100)),
 
-    url(r'^test/(?P<pk>\d+)/$',TwitterUserListView.as_view(model=TwitterUser)),
+    url(r'^test/(?P<pk>\d+)/$',TwitterUserDetailView.as_view(model=TwitterUser)),
     )
 

File followerslookup/views.py

 import os
 from tasks import convert_context_to_csv
 
-class TwitterUserListView(DetailView):
+class TwitterUserDetailView(DetailView):
     
     def get_context_data(self,**kwargs):
-        context = super(TwitterUserListView,self).get_context_data(**kwargs)
+        context = super(TwitterUserDetailView,self).get_context_data(**kwargs)
         avg = GenericListViewMixin()
         avg.kwargs, avg.request = self.kwargs, self.request
+        avg.kwargs['user'] = TwitterUser.objects.get(pk = self.kwargs['pk']).name
         # count number of users
         extra_queryset = avg.get_queryset()
         context['count'] = extra_queryset.count()
         # count users without picture
-        context['nopicture'] = extra_queryset.filter(profile_image_url__contains = "default_profile").count()
-        context['notweet'] = extra_queryset.filter(statuses_count = 0).count()
+        context['nopicture'] = "%i %%"%(
+            (float(
+                extra_queryset.filter(
+                    profile_image_url__contains = "default_profile"
+                    ).count()
+                ) / float(context['count'])
+                ) * 100
+            )
+        context['notweet'] ="%i %%"%(
+            float(
+                extra_queryset.filter(statuses_count = 0).count() / float(context['count'])
+                                                                            )
+                                                                            *100)
         return context
 
 class GenericListViewMixin(object):
             if not os.path.exists(base_path):
                      os.makedirs(base_path)
             
+
             convert_context_to_csv.delay(context,csv_file = path)
             return HttpResponse(
         """