Source

grabtweets / grabtweets.py

#!/usr/bin/env python

from __future__ import print_function
import os, sys, json
from optparse import OptionParser

BASE_DIR = os.path.abspath(os.path.dirname(__file__))
PYTHON_TWITTER_PATH = os.path.join(BASE_DIR, 'python_twitter')
sys.path.append(PYTHON_TWITTER_PATH)

from python_twitter import twitter


def _load_file(filename):
    if not os.path.exists(filename):
        return {}
    with open(filename, 'r') as f:
        return json.load(f)

def _get_more_tweets(username, existing_tweets):
    api = twitter.Api()
    
    last_id = int(max(existing_tweets.keys())) if existing_tweets else 0
    first_id = int(min(existing_tweets.keys())) if existing_tweets else None
    
    statuses = api.GetUserTimeline(username, max_id=first_id, count=200)
    existing_tweets.update(dict((s.id, (s.created_at, s.in_reply_to_screen_name, s.in_reply_to_user_id, s.in_reply_to_status_id, s.source, s.text)) for s in statuses))
    
    statuses = api.GetUserTimeline(username, since_id=0, count=200)
    existing_tweets.update(dict((s.id, (s.created_at, s.in_reply_to_screen_name, s.in_reply_to_user_id, s.in_reply_to_status_id, s.source, s.text)) for s in statuses))
    
    return existing_tweets

def _dump_to_file(filename, tweets):
    with open(filename, 'w') as f:
        json.dump(tweets, f)
    

def _build_parser():
    """Return a parser for the command-line interface."""
    usage = "Usage: %prog -u USERNAME FILE"
    parser = OptionParser(usage=usage)
    
    parser.add_option("-u", "--username", dest="username", default=None,
        help="grab (new) tweets from USERNAME", metavar="USERNAME")
    parser.add_option("-p", "--print",
        dest="print_tweets", action="store_true", default=False,
        help="print the tweets in FILE", metavar="FILE")
    
    return parser

def _main():
    """Run the command-line interface."""
    parser = _build_parser()
    (options, args) = parser.parse_args()
    
    if len(args) != 1:
        parser.error('You must specify a FILE!')
    
    tweets = _load_file(args[0])
    
    if options.print_tweets:
        #     key       0                    1                        2                      3                   4          5
        #     'id', 'created_at', 'in_reply_to_screen_name', 'in_reply_to_user_id', 'in_reply_to_status_id', 's.source', 's.text'
        print('id', 'created_at', 'in_reply_to_screen_name', 'in_reply_to_user_id', 'in_reply_to_status_id', 'source', 'text', sep='\t')
        tweets = sorted([(int(tid), tweets[tid]) for tid in tweets])
        for tid, tdata in tweets:
            print(tid, tdata[0].encode('utf-8'), tdata[1], tdata[2], tdata[3], tdata[4], tdata[5].encode('unicode_escape'), sep='\t')
        return
    
    if not options.username:
        parser.error('You must specify a USERNAME!')
    
    tweets = _get_more_tweets(options.username, tweets)
    _dump_to_file(args[0], tweets)


if __name__ == '__main__':
    _main()