Commits

Anonymous committed 0f8eec6

initial checkin

Comments (0)

Files changed (9)

+syntax: glob
+*.pyc
+dist
+MANIFEST
+_build
+_static
+.DS_Store
+Requirements
+------------
+Google App engine
+Tweepy http://github.com/joshthecoder/tweepy
+
+------------------
+Install
+------------------
+
+Open up the app.py file and fill out the settings at the top of the file
+
+open up app.yaml and put your application information there.
+
+Download tweepy from http://github.com/joshthecoder/tweepy and put tweepy directory at the same
+level as app.py
+
+Copyright (c) 2010, Ken Cochrane
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of the author nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+========================
+GAE-Twitter-Bot
+========================
+
+Google App Engine Twitter Bot will take feeds from yahoo pipes, parse them, check for duplicates and post them to a twitter account in a twitter friendly manner (throttled, so that we don't overload twitter). 
+
+It will also shorten the urls it gets using a few different url shortening services bit.ly, u.nu, and is.gd.
+
+Once setup the whole thing runs automatically, very hands off. 
+
+I built this as a way to get familiar with google app engine and the twitter api, and the new things in google app engine crons and queues. 
+
+I am now releasing it to all, so that everyone can see what I did, and learn from it and improve it. Enjoy, Ken Cochrane
+#!/usr/bin/env python
+
+# --------- Imports --------
+import cgi, urllib
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp.util import run_wsgi_app
+from google.appengine.ext import db
+from google.appengine.api import urlfetch
+from google.appengine.api.labs import taskqueue
+from md5 import md5
+from django.utils import simplejson as json
+import tweepy
+from tweepy.error import TweepError
+import logging
+from random import choice
+
+# --------- Settings ---------
+# Add your twitter username
+TWITTER_ID = '' 
+
+# Add your twitter password
+TWITTER_PWD = '' 
+
+# add your Yahoo Pipe URL's 
+# THEY NEED TO BE RETURNED AS JSON
+# It is a Dict, so feel free to put more then one ('url1','url2') etc
+YAHOO_PIPE_URL = () 
+
+# if you don't want to post to twitter put False
+# this would be good for when you are testing it our locally.
+TWITTER_ENABLED = True
+
+# Bitly login
+BITLY_LOGIN = ""
+
+# bitly api key
+BITLY_API_KEY = ""
+
+# how many times to try a short url service before you give up
+SHORT_URL_NUM_RETRIES = 5
+# --------- END Settings ---------
+
+
+# ------- Database Handling -----
+class PostDB(db.Model):
+    """ This is the model that will store our posts """
+    md5_hash = db.StringProperty(required=True)
+    status = db.StringProperty(multiline=True)
+    failed = db.BooleanProperty()
+
+# ------ URL Shortening Tool -----
+def bitly(url):
+    """ call bitly's url shortening service and shorten the given url """
+    encoded_url = urllib.urlencode({'longUrl': url})
+    full_link = "http://api.bit.ly/v3/shorten?format=txt&apikey=%s&login=%s&%s" % (BITLY_API_KEY,BITLY_LOGIN,encoded_url)
+    result = urlfetch.fetch(full_link,deadline=5)
+    if result.status_code == 200:
+        return result.content
+    else:
+        logging.error("bitly had error %s , %s " % (result.status_code,result.content))
+        raise Exception("bitly had error %s , %s" % (result.status_code, result.content))
+
+# ------------------------------------------------------------------------
+def unu(url):
+    """ call u.nu url shortening service and shorten the given url """
+    full_link = "http://u.nu/unu-api-simple?" + urllib.urlencode({'url': url})
+    result = urlfetch.fetch(full_link,deadline=5)
+    if result.status_code == 200:
+        return result.content
+    else:
+        logging.error("u.nu had error %s " % result.status_code)
+        raise Exception("unu had error")
+        
+# ------------------------------------------------------------------------
+def isgd(url):
+    """ call is.gd url shortening service and shorten the given url """
+    full_link = "http://is.gd/api.php?" + urllib.urlencode({'longurl': url})
+    result = urlfetch.fetch(full_link,deadline=5)
+    if result.status_code == 200:
+        return result.content
+    else:
+        logging.error("is.gd had error %s " % result.status_code)
+        raise Exception("is.gd had error")
+
+# ------------------------------------------------------------------------
+def build_shorties():
+    """ build up the list of short url services we will use """
+    shorties = [unu,isgd]
+    if BITLY_LOGIN and BITLY_API_KEY:
+        shorties.append(bitly)
+    return shorties
+
+# ------------------------------------------------------------------------
+def shorten_url(url,count=None):
+    """ this will shorten the url, but choosing one of our short
+        url services are random """
+    if not count:
+        count=0
+    try:
+        return choice(build_shorties())(url)
+    except Exception, e:
+        logging.error("had error, try again %s " % e)
+        if count < SHORT_URL_NUM_RETRIES:
+            count += 1
+            shorten_url(url,count=count)
+        else:
+            raise Exception("Too Many errors, rain out of Retries")
+
+# ------------------------------------------------------------------------
+class TestShorty(webapp.RequestHandler):
+    """ I use this to test out the short url services """
+    def get(self):
+        urls = ('http://google.com',
+                'http://yahoo.com',
+                'http://kencochrane.net',
+                'http://zanbow.com',
+                'http://youtube.com',
+                'http://bing.com')
+        for url in urls:
+            shorty = shorten_url(url)
+            self.response.out.write("both: %s - %s <br />\n " % (url,shorty))
+        for url in urls:
+            shorty = bitly(url)
+            self.response.out.write("bitly: %s - %s <br />\n" % (url,shorty))
+        for url in urls:
+            shorty = unu(url)
+            self.response.out.write("u.nu: %s - %s <br />\n" % (url,shorty))
+        for url in urls:
+            shorty = isgd(url)
+            self.response.out.write("is.gd: %s - %s <br />\n" % (url,shorty))
+                
+        self.response.out.write("done!")
+
+# ------------------------------------------------------------------------
+# Handlers
+# ------------------------------------------------------------------------
+class Home(webapp.RequestHandler):
+    """ Home request handler, nothing now, but something in future """
+    def get(self):
+        self.response.out.write("Nothing Here yet!")
+
+# ------------------------------------------------------------------------
+class PosterBot(webapp.RequestHandler):
+    """ This is the guy that posts the messages to twitter, gets items from poster-queue """
+    def get(self):
+        self.response.out.write("Denied!")
+    def post(self): # should run at most 40/h
+        twitter_id = TWITTER_ID 
+        twitter_pwd = TWITTER_PWD
+        if TWITTER_ENABLED:
+            # May need to switch this out since I hear they are removing support for basic Auth
+            auth = tweepy.BasicAuthHandler(twitter_id, twitter_pwd)
+            bot = tweepy.API(auth)
+        key = self.request.get('key')
+        postie = PostDB.get(key)
+        if postie:
+            try:
+                logging.info(postie.status)
+                if TWITTER_ENABLED:
+                    bot.update_status(postie.status)
+                postie.failed = False
+                postie.put()
+            except TweepError, e:
+                logging.error("error = %s" % e)
+                postie.failed = True
+                postie.put()
+        else:
+            logging.info("postie is NONE?  %s " % postie)
+                
+# ------------------------------------------------------------------------
+class CronBot(webapp.RequestHandler):
+    """ cron bot, will run when scheduled and read the yahoo pipe feeds, add them to the fetcher-queue """
+    def get(self):
+        for feed in YAHOO_PIPE_URL:
+            taskqueue.Task(url='/worker/getter', params={'feed': feed}).add(queue_name='fetcher-queue')
+
+# ------------------------------------------------------------------------
+class FixerCron(webapp.RequestHandler):
+    """ If a post gets in an odd state this guy will find it, and attempt to fix it. """
+    def get(self):
+        query = PostDB.all()
+        query.filter('failed =',True)
+        results = query.fetch(25)
+        for result in results:
+            logging.info("Fix %s" % result)
+            taskqueue.add(url='/worker/poster', params={'key': result.key()})
+
+# ------------------------------------------------------------------------
+class ProcessBot(webapp.RequestHandler):
+    """ get items off the process-queue, shorten url, and add to database. and then add to poster-queue"""
+    def get(self):
+        self.response.out.write("Denied!")
+    def post(self):
+        title = self.request.get('title')
+        link = self.request.get('link')
+        shortUrl = shorten_url(link)
+        try:
+            title = unicode(title).encode('ascii','ignore')
+        except Exception, e:
+            logging.error("Error = %s " % e)
+            # if we have an error with encoding just skip for now.
+            # TODO: need to fix this, when I have more time.
+            return
+        status = "%s:- %s" % (title,shortUrl)
+        status = status.decode('utf-8').encode('ascii','ignore')
+        entry_hash = md5("%s-%s"% (title,link)).hexdigest()
+        newsb = PostDB(md5_hash=entry_hash,failed=False,status=status)
+        newsb.put()
+        taskqueue.Task(url='/worker/poster', params={'key': newsb.key()}).add(queue_name='poster-queue')
+
+# ------------------------------------------------------------------------
+class GetterBot(webapp.RequestHandler):
+    """ Get the feeds from the fetcher-queue, parse them and put the entries on the process queue"""
+    def get(self):
+        self.response.out.write("Denied!")
+        
+    def post(self):
+        feed = self.request.get('feed')
+        result = urlfetch.fetch(feed)
+        if result.status_code == 200:
+            data = result.content
+        else:
+            logging.error("error status code = %s" % result.status_code)
+            return
+        entries = json.loads(data)['value']['items']
+        
+        if len(entries) != 0:
+            entries.reverse()
+            add_count = 0
+            skip_count = 0
+            error_count = 0
+            for entry in entries:
+                try:
+                    title = str(unicode(entry['title']).encode("utf-8"))
+                    link = str(unicode(entry['link']).encode("utf-8"))
+                    #title = title.decode('utf-8').encode('ascii','ignore')
+                    title = unicode(title).encode('ascii','ignore')
+                except Exception, e:
+                    # if we have an error with encoding just skip it for now.
+                    logging.error("Error = %s " % e)
+                    continue
+                entry_hash = md5("%s-%s"% (title,link)).hexdigest()
+                if db.Query(PostDB).filter('md5_hash =',entry_hash).count() == 0:
+                    try:
+                        taskqueue.Task(url='/worker/processer', params={'title': title,'link':link}).add(queue_name='process-queue')
+                        add_count += 1
+                    except:
+                        logging.error("2. error usually unicode issue or shorturl issue")
+                        error_count += 1
+                else:
+                    skip_count += 1
+                    
+            self.response.out.write("Whoa! Done, finally!")
+            total_count = add_count + skip_count + error_count
+            logging.info("All Done! Total Count:%s , SKIP count:%s, ADD count: %s, ERROR count %s" % (total_count,skip_count,add_count,error_count))
+
+# ------------------------------------------------------------------------
+application = webapp.WSGIApplication(
+    [
+        ('/', Home),
+        ('/test/shorty', TestShorty),
+        ('/cron/fetch', CronBot),
+        ('/cron/fixer', FixerCron),
+        ('/worker/getter', GetterBot),
+        ('/worker/poster', PosterBot),
+        ('/worker/processer', ProcessBot),
+    ],debug=True)
+
+# ------------------------------------------------------------------------
+def main():
+    run_wsgi_app(application)
+
+# ------------------------------------------------------------------------
+if __name__ == "__main__":
+    main()
+application: #ADD YOUR APP NAME HERE
+version: #ADD YOUR VERSION HERE
+runtime: python
+api_version: 1
+
+handlers:
+- url: .*
+  script: app.py
+cron:
+- description: Fetcher Cron Job
+  url: /cron/fetch
+  schedule: every 30 mins
+- description: Fixer Cron
+  url: /cron/fixer
+  schedule: every 6 hours
+
+indexes:
+
+# AUTOGENERATED
+
+# This index.yaml is automatically updated whenever the dev_appserver
+# detects that a new type of query is run.  If you want to manage the
+# index.yaml file manually, remove the above marker line (the line
+# saying "# AUTOGENERATED").  If you want to manage some indexes
+# manually, move them above the marker line.  The index.yaml file is
+# automatically uploaded to the admin console when you next deploy
+# your application using appcfg.py.
+queue:
+- name: default
+  rate: 40/h
+- name: poster-queue
+  rate: 40/h
+- name: fetcher-queue
+  rate: 5/s
+- name: process-queue
+  rate: 45/m
+