Source

metricbot / metricbot.py

Full commit
import re
import math
import decimal
import time
import itertools

import quantities
from redditbot import RedditBot

import logging
logging.basicConfig(filename=__file__ + '.log', format='%(asctime)s [%(levelname)s] %(message)s', level=logging.DEBUG)

MPH = quantities.mile / quantities.hour
MPH.name = 'miles per hour'

KPH = quantities.km / quantities.hour
KPH.name = 'km/h'

IMP_UNITS = {
    'ft': quantities.ft,
    'foot': quantities.ft,
    'feet': quantities.ft,
    'lbs': quantities.lb,
    'inches': quantities.inch,
    'inch': quantities.inch,
    'gallon': quantities.gallon,
    'gallons': quantities.gallon,
    'miles': quantities.mile,
    'mile': quantities.mile,
    'mph': MPH,
    #'oz': quantities.oz,
    #'ounce': quantities.oz,
    #'ounces': quantities.oz, # How do I know if it's ml or mg?
}

SI_UNITS = {
    quantities.ft: quantities.m,
    quantities.lb: quantities.kg,
    quantities.inch: quantities.cm,
    quantities.gallon: quantities.liter,
    quantities.mile: quantities.km,
    quantities.oz: quantities.gram,
    MPH: KPH,
}

IGNORE_IF_CONTAINS = 'meter', 'kg', 'cm', 'liter', 'gram', 'm/s'

def nice_number_str(number):
    if abs(number) > 1e20:
        return '{:,.2g}'.format(number)
    elif abs(number) > 1e3:
        return '{:,.0f}'.format(number)
    elif abs(number) > 1:
        return '{:.2f}'.format(number)
    else:
        # number is between 1 and -1
        # make sure that we only show 2 digits other than zero.
        # eg 0.00001234521 will become 0.000012
        prec_point = math.log(abs(number)) / math.log(10)
        prec_point = math.floor(prec_point) - 1
        prec_fac = decimal.Decimal(10) ** int(prec_point)
        number = decimal.Decimal(number).quantize(prec_fac)
        # normalize removes trailing zeroes (eg 0.30 turns into 0.3)
        return '{0}'.format(number.normalize())
    

def suspected_converted_already(text ):
    for word in IGNORE_IF_CONTAINS:
        if word in text:
            # found a metric word there, maybe it was preconverted
            return True
    
    return False
    

def nice_unit_name(unit):
    if unit in (KPH, MPH):
        return unit.name
    else:
        return unit.name + 's'

def number_unit_conversions(text):
    number_word_pairs = re.findall(r'([/\d,\.]+)[ ]?([a-zA-Z\-/]+)', text)
    for number, word in number_word_pairs:
        if word in IMP_UNITS:
            no_commas_number = number.replace(',', '')
            try:
                amount = float(no_commas_number)
            except ValueError:
                # bad number eg '0.,0'
                continue
            unit = IMP_UNITS[word]
            si_unit = SI_UNITS[unit]
            converted = (amount * unit).rescale(si_unit)
            converted = float(converted)
            num_str = nice_number_str(converted)
            
            src = '%s %s' % (number, word)
            dst = '%s %s' % (num_str, nice_unit_name(si_unit))
            yield src, dst

def foot_inch_apostrophe_conversions(text):
    for feet, inches in re.findall(r'''(\d+)'(\d+)"''', text):
        try:
            feet = int(feet)
            inches = int(inches)
            amount = quantities.ft * feet + quantities.inch * inches
            result = amount.rescale(quantities.m)
            result = float(result)
            num_str = nice_number_str(result)
            
            src = '''%d'%d"''' % (feet, inches)
            dst = '%s %s' % (num_str, 'meters')
            
            yield src, dst
            
        except ValueError:
            # bad number eg '0.,0'
            continue
            

def find_imperials(text):
    conversions = []
    seen = set()
    for src, dst in itertools.chain(number_unit_conversions(text), foot_inch_apostrophe_conversions(text)):
        if dst in seen:
            continue
        else:
            seen.add(dst)
        line = '%s = %s' % (src, dst)
        conversions.append(line)
    
    return conversions


def metric_response(text):
    conversions = find_imperials(text)
    response = '\n\n'.join(conversions)
    if suspected_converted_already(text):
        return ''
    return response

HOUR = 60 * 60
MIN_TIME_FOR_REPEAT = 6 * HOUR
MIN_DT_FOR_POST = 1 * HOUR

class Throttle:
    def __init__(self):
        self.d = {}
        self.last = None
    def is_ok_to_post(self, text):
        # text shouldn't repeat itself in a few hours
        now = time.time()
        if self.last is not None and (now - self.last) < MIN_DT_FOR_POST:
            return False
        
        if text in self.d:
            posted = self.d[text]
            deltat = now - posted
            if deltat <= MIN_TIME_FOR_REPEAT:
                return False

        self.d[text] = now
        self.last = now
        return True

globthrot = Throttle()

class MetricBot(RedditBot):
    avoid_subreddits = ['acadiana']
    
    def handle_comment(self, comm):
        self.log('body: %s' % comm.body)
        response = metric_response(comm.body)
        if response != '':
            if globthrot.is_ok_to_post(response):
                self.log('comment: %s' % comm.permalink)
                self.log('cresponse: %s' % response)
                comm.reply(response)
                return True
        
        return False
        
    def handle_link(self, link):
        self.log('title: ' + link.title)
        response = metric_response(link.title)
        if response != '':
            # NOTE: links don't need a global throttle as you don't have the risk of spamming a thread
            self.log('link: ' + link.permalink)
            self.log('tresponse: %s' % response)
            link.add_comment(response)
            return True
            
        return False
    
    
def main():
    pw = open('pw.txt').read()
    bot = MetricBot('metricbot', pw)
    bot.mainloop()
    
    
    
    
    
if __name__ == '__main__':
    try:
        main()
    except:
        import traceback
        logging.error(traceback.format_exc())
        raise