Source

metricbot / redditbot.py

Full commit
import time
import shelve
import logging
import urllib2

import reddit

LIMIT = 100



class RedditBot:
    avoid_subreddits = []

    def __init__(self, user, pw):
        self.user = user
        self.pw = pw
        self.db = shelve.open(self.user + '.py.db')
        
        self.avoid_subreddits = [i.lower() for i in self.avoid_subreddits]
    
    def log(self, text):
        # print text # caused unicode bugs
        logging.debug(text)
    def error(self, text):
        logging.error(text)
    
    def handled_in_past(self, place_holder):
        if place_holder.encode('utf-8') in self.db:
            return True
        else:
            return False
    
    def report_handled(self, obj, place_holder):
        permalink = obj.permalink
        self.db[place_holder.encode('utf-8')] = True
        print ''
        print 'handled:\n%s' % permalink
    
    def handle_time_block(self, e):
        print 'time limit blocked'
        self.error('NOT POSTED: %s' % e)
        time.sleep(10 * 60)
    
    def is_me(self, author):
        if type(author) == type(u''):
            # this is an annoying little bug
            name = author
        else:
            name = author.name
        
        if name.lower() == self.user.lower():
            return True
        else:
            return False
    
    def statistics(self, obj_list, type_name, responded):
        if len(obj_list) == 0:
            self.log('handled 0 %s' % type_name)
            return
        
        times = [obj.created_utc for obj in obj_list]
        span = max(times) - min(times)
        rate = len(times) / span
        log_line = 'scanned %d %s, replied to %d. Reddit rate = %s per second' % (len(times), type_name, responded, rate)
        self.log(log_line)
        print log_line
    
    def feed_comments(self):
        r = self.r
        comments = []
        responded = 0
        for comm in r.get_all_comments(limit=LIMIT, place_holder=self.comment_place_holder):
            # next time get the rest of the comments after this one.
            self.comment_place_holder = comm.name
            
            print 'c',
            poster = comm.author
            if self.is_me(poster):
                continue
            
            if self.handled_in_past(self.comment_place_holder):
                continue
            
            if comm.subreddit.title.lower() in self.avoid_subreddits:
                continue

            # handle
            comments.append(comm)
            res = False
            try:
                res = self.handle_comment(comm)
            except reddit.errors.RateLimitExceeded, e:
                self.handle_time_block(e)
            except reddit.errors.APIException, e:
                # eg comment deleted
                logging.error('reddit.errors.APIException: ' + str(e))
                
            if res:
                responded += 1
                self.report_handled(comm, self.comment_place_holder)
        
        print ''
        self.statistics(comments, 'comments', responded)
    
    def feed_titles(self):
        r = self.r
        links = []
        responded = 0
        for link in r.get_subreddit('all').get_new(limit=LIMIT, url_data={"sort": "new"}, place_holder=self.link_place_holder):
            self.link_place_holder = link.id
            print 'l',
            poster = link.author
            if self.is_me(poster):
                continue
            
            if self.handled_in_past(self.link_place_holder):
                continue
            
            if link.subreddit.title.lower() in self.avoid_subreddits:
                continue
            
            # handle
            links.append(link)
            res = False
            try:
                res = self.handle_link(link)
            except reddit.errors.RateLimitExceeded, e:
                self.handle_time_block(e)
            except reddit.errors.APIException, e:
                # eg comment deleted
                logging.error('reddit.errors.APIException: ' + str(e))
            if res:
                responded += 1
                self.report_handled(link, self.link_place_holder)
            
        print ''
        self.statistics(links, 'links', responded)
    
    def mainloop(self):
        self.log('start')
        r = reddit.Reddit(user_agent="metricbot_comment_generator")
        self.r = r
        r.login(self.user, self.pw)
        self.log('logged in')
        self.comment_place_holder = None
        self.link_place_holder = None
        
        while True:
            try:
                self.feed_comments()
                self.feed_titles()
            except urllib2.HTTPError, e:
                logging.error('urllib2.HTTPError: %s' % e)
                # is reddit down?
                time.sleep(60)
            
            time.sleep(600)
    
    def handle_comment(self, comm):
        pass
    def handle_link(self, link):
        pass