Source

eggDiscover / followerslookup / fetching.py

Full commit
"""
Fetching is a set of utilities to query a user followers informations.
You will need an authenticated Twitter Account given by authentication.py
and a twitter account.

the authenticated account will be used to query twitter REST API.
the twitter account will be the account against witch query will be made.
This set of utilities can be used as follow : 

>>> # first get an authenticated account as explained in authentication.py
>>> # then request the list of followers for a user :
>>> followers_ids = get_followers_ids(account, "NicolasSarkozy")
>>> # then get extended informations about these users :
>>> followers_infos = get_followers_infos(account, followers_ids)
>>> # you are then free to use the returned list as you see fit.
>>> # you can save it to a database for exemple :
>>> save_users("NicolasSarcozy",followers_infos) 
"""
from twitter.api import TwitterHTTPError
from django.core.exceptions import ObjectDoesNotExist
from models import User, TwitterUser
import datetime

def get_followers_ids(account, user):
    """
    This function will return a list of followers ids. Each request on the twitter API will return 5000 ids.
    as we are limited to 350 request per hours. so we can get 17,500,000 ids per hours.
    
    params account : account is an authenticated Twitter account
    
    params user : user is a string. It represent a twitter account.
    for exemple, NicolasSarcozy. this twitter account must be a valid twitter account.
    """
    #first get a list of all followers id for user
    followers_ids = []
    try :
        twitter_response = account.followers.ids(screen_name = user)
        followers_ids = [follower_id for follower_id in twitter_response['ids']]
    except TwitterHTTPError, e:
        # twitter is over capacited or we are rate limited
        if "Rate limit exceeded" in e.response_data:
            return "Rate limit reached" # operation cannot be continued this hour. Wait next hour
        twitter_response["next_cursor"] = -1

    while twitter_response["next_cursor"] != 0 :
        try: 
            twitter_response = account.followers.ids(screen_name = user, cursor = twitter_response["next_cursor"])
            [followers_ids.append(follower_id) for follower_id in twitter_response['ids']]
        except TwitterHTTPError, e:
            if "Rate limit exceeded" in e.response_data:
                return "Rate Limit reached" # operation cannot be continued this hour. Wait next hour
            # twitter is over capacited
            else:
                pass
    return followers_ids


def get_followers_infos(account,followers_ids,user):
    """
    This function take a list of user ids and request twitter for extended
    informations on these users.  Each request on the twitter API will return
    100 users. As we are limited to 350 request per hours.  so we can get 35,000
    users per hour.

    TODO : do not lookup user already in the database. 
    """

               
    count = 100
    users = []
    twitter_user = TwitterUser.objects.get(name = user)
    unfetched_users = followers_ids[twitter_user.lastindex:]
    while count < len(unfetched_users):
        start = datetime.datetime.now()
        try:
            users_ids = ','.join(str(id) for id in unfetched_users[count-100:count])
            twitter_response = account.users.lookup(user_id = users_ids, method = "POST")
            save_users(twitter_user, twitter_response)
            print datetime.datetime.now() - start
            count += 100
        except TwitterHTTPError, e:
            # twitter is over capacited or we are rate limited
            if "Rate limit exceeded" in e.response_data:
                return "Rate Limit Reached" # operation cannot be continued this hour. Wait next hour
            # twitter is overcapacited, go next
            else:
                pass

    return "Fetch ended"

def save_users(twitter_user,users):
    """
    Take a list of users extended informations. These users must be followers of
    a twitter account given in parameter. Each user is parsed and saved
    in the database.
    """
    for usr in users:
        save_user(twitter_user, usr)


def save_user(twitter_user, user):
    """
    First we will check if the user is already in the database.
    If the user is in the database, we return this user.

    else we parse the user for usefull informations, save him in the database.

    finaly, we return the parsed user.    
    """
    try:
        
        usr = User.objects.get(pk = user['id'], twitteruser__name = twitter_user)
        return usr
    
    except ObjectDoesNotExist:
        #well user doesn't exist 
        #so we parse the user informations and
        #we save it in the databese
        usr = User()

    for k, v in user.items():
        #The only particular field is the created_at field. We must
        #format the date
        if k == "created_at":
            v = datetime.datetime.strptime(v,'%a %b %d %H:%M:%S +0000 %Y')
        if v == 'None':
            pass
        else:
            setattr(usr,k,v)
        
        if not usr.notifications:
            #Notifications are True or False but Twitter doesn't
            #return the field if it is false
            usr.notifications = False
    # set the twitter_user to the twitter_account we are looking
    # we use an add cause a user can follow more than one user ;)
    twitter_user, created = TwitterUser.objects.get_or_create(name = twitter_user)
    usr.save()
    twitter_user.followers.add(usr)
    twitter_user.lastindex += 1
    twitter_user.save()
    return usr

def cleanup(followers_ids,twitter_user):
    """
    cleanup look for twitter users no more following a TwitterUser and
    delete them from the database
    """

    id_list = User.objects.values('pk')
    for id in id_list:
        if not id['pk'] in followers_ids:
            user = User.objects.get(pk=id['pk'])
            twitter_user.remove(user)
            twitter_user.sav()
            if len(user.twitteruser_set.all()) == 0:
                user.delete()