Source

tw2buzz / tw2buzz.py

#############################################################################
#    Copyright 2010 Dhananjay Nene 
#    
#    Licensed under the Apache License, Version 2.0 (the "License"); 
#    you may not use this file except in compliance with the License. 
#    You may obtain a copy of the License at 
#        
#        http://www.apache.org/licenses/LICENSE-2.0 
#    
#    Unless required by applicable law or agreed to in writing, software 
#    distributed under the License is distributed on an "AS IS" BASIS, 
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
#    See the License for the specific language governing permissions and 
#    limitations under the License.
############################################################################# 

import ConfigParser
import base64
import pprint
import simplejson
import sys
import time
import types
import urllib
import urllib2
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
import smtplib



conf_param_names = ('twitter_handle','smtp_from_addr','smtp_password',) 
transient_param_names = ('last_tweet_id',)
keyword = "#bz"
kw = urllib.quote(keyword)

def abort(message):
    print >> sys.stderr, message
    sys.exit(1)

class Data(object):
    """
    This is a generic data object. It is a completely dynamic object meant to 
    create itself from a dictionary or a set of nested dictionaries. All keys 
    in the dictionary are converted to object attributes allowing for an easier 
    and more intuitive access to the object attributes.
    """
    def __repr__(self):
        return "Generic Data Object:%s" % self.__dict__.__repr__()
    def __str__(self):
        return "Generic Data Object:%s" % self.__dict__.__str__()
    
    @classmethod
    def convert_dict(cls,val):
        if isinstance(val,types.DictType) :
            newval = Data()
            for nkey,nval in val.items() :
                newval.__dict__[nkey] = Data.convert_dict(nval)
            return newval
        elif isinstance(val,(types.ListType,types.TupleType)) :
            return list(Data.convert_dict(item) for item in val)
        else :
            return val
        
    @classmethod
    def load_from_json(self,json):
        data = Data()
        for key,val in json.items():
            data.__dict__[key] = Data.convert_dict(val)
        return data  #to allow chaining of calls

def http_to_json(url):
    """
    This method invokes the remote URL which is expected to revert with a JSON datastream. It returns
    an object with the json keys being attributes of the object along with the corresponding values.
    """
    try:
        response = urllib2.urlopen(url)
        stream = response.read()
        # A JSON stream begins with a { or a [
        if (stream[0] == '{') or (stream[0] == '[') :
            jsonobj = simplejson.loads(stream)
            if jsonobj :
                # Some twitter API return an 'error' in case of error
                if stream[0] == '[' :
                    data = [Data.load_from_json(obj) for obj in jsonobj]
                else:
                    data = Data.load_from_json(jsonobj)
    except urllib2.URLError, e:
        print 'Received Error while fetching twitter record for url : %s = %s' % (url, e)
        raise e
    return data

def fetch_tweets(section_details, last_tweet_id):
    rpp = 200
    page = 1
    
    search_query = "http://search.twitter.com/search.json?q=+%s+from:%s&rpp=%s&since_id=%s&page=%s"%(kw,section_details['twitter_handle'],rpp,last_tweet_id,page)
    data = http_to_json(search_query)
    for result in data.results :
        result.text = result.text.replace(keyword,'')
    data.results.reverse()    
    return data.results

def get_config(inifile, section_prefix, conf_param_names, abort_on_not_found):
    parser = ConfigParser.ConfigParser()
    parser.read(inifile)
    
    conf_dict = {}
    section_found = False
    for section in parser.sections() :
        if not section_prefix or section.startswith(section_prefix) :
            section_found = True
            conf = {}
            for name in conf_param_names:
                if parser.has_option(section, name) :
                    val = parser.get(section,name)
                else :
                    if abort_on_not_found :
                        abort('Error: Parameter %s not specified for section %s' % (name,section))
                    else :
                        val = None
                conf[name] = val
            conf_dict[section] = conf

    if abort_on_not_found and not section_found :
        abort('Either config file %s or section with prefix %s in it not found' % (inifile,section_prefix))

    return conf_dict

def save_transient_data(dct_):
    parser = ConfigParser.RawConfigParser()
    for section_name, section_details in dct_.items() :
        parser.add_section(section_name)
        for param_name, param_val in section_details.items() :
            parser.set(section_name, param_name, param_val)
    
    with open('transient.ini','wb') as transient_config_file :
        parser.write(transient_config_file)
        
def mail_to_buzz(section_details,tweet):
    msg = MIMEMultipart()
    msg['Subject'] = tweet.text
    msg['From'] = section_details['smtp_from_addr']
    msg['To'] = 'buzz@gmail.com'
    server = smtplib.SMTP('smtp.gmail.com:587')
    server.starttls()
    server.login(section_details['smtp_from_addr'],section_details['smtp_password'])
    server.sendmail(section_details['smtp_from_addr'], 'buzz@gmail.com', msg.as_string())
    server.quit()
    return tweet.id

        
def main():
    # Get static configuration parameters
    conf_dict = get_config('conf.ini','tw2buzz:', conf_param_names, True)
    # Get the last read tweet ids
    tracking_dict = get_config('transient.ini',None,transient_param_names, False)
    # For each tw2buzz section ie. each twitter handle
    for section_name, section_details in conf_dict.items() :
        transient_section = tracking_dict.get(section_name,None)
        if transient_section is None :
            transient_section = {}
            tracking_dict[section_name] = transient_section
            transient_section['last_tweet_id'] = 0
        last_tweet_id = int(transient_section['last_tweet_id'])
        matching_tweets = fetch_tweets(section_details,last_tweet_id)
        for tweet in matching_tweets :
            tweet_id = mail_to_buzz(section_details,tweet)
            if tweet_id :
                last_tweet_id = tweet_id
        
        transient_section['last_tweet_id'] = last_tweet_id

    save_transient_data(tracking_dict)
    
if __name__ == '__main__' : main()