Commits

Benoit Boissinot committed dac1f1b

add support for textsfromlastnight.com (scraping)

Comments (0)

Files changed (1)

         self.randomData = {'qdb.us':[],
                             'bash.org':[],
                             'viedemerde.fr':[],
-                            'fmylife.com':[]
+                            'fmylife.com':[],
+                            'textsfromlastnight.com':[],
                             }
 
     def callCommand(self, method, irc, msg, *L, **kwargs):
             irc.error(str(e))
 
     _joiner = ' // '
+    _tflnReString = r'<div class="post_wrap" id="entry_(?P<id>\d+)">.*?'\
+                    r'<div class="post_content">(?P<text>.*?)</div>.*?'\
+                    r'<span id="postingGoodCount_.*?">(?P<up>\d+)</span>.*?'\
+                    r'<span id="postingBadCount_.*?">(?P<down>\d+)</span>'
     _qdbReString = r'<tr><td bgcolor="#(?:ffffff|e8e8e8)"><a href="/\d*?">'\
                     r'#\d*?</a>.*?<p>(?P<text>.*?)</p></td></tr>'
     _gkREDict = {'bash.org': re.compile(r'<p class="qt">(?P<text>.*?)</p>',
                     re.M | re.DOTALL),
-                'qdb.us': re.compile(_qdbReString, re.M | re.DOTALL),}
+                'qdb.us': re.compile(_qdbReString, re.M | re.DOTALL),
+                'textsfromlastnight.com':
+                    re.compile(_tflnReString, re.M | re.DOTALL),}
     _betacieUrl = ('http://api.betacie.com/view/'
                    '%(id)s/nocomment?key=readonly&language=%(lang)s')
 
     def _gkBackend(self, irc, msg, site, id):
         if not id:
             id = 'random'
-        fetchData = True
         quote = ''
         if id == 'random':
             timeRemaining = int(time.time()) - self.lastqdbRandomTime
             else:
                 lang = 'en'
             url = self._betacieUrl % {'id': id, 'lang': lang}
+        elif site == 'textsfromlastnight.com':
+            url = 'http://%s/%s%s' % (site,
+                                      not random and 'view/' or '',
+                                      id)
         else:
             url = 'http://%s/?%s' % (site, id)
         html = ''
                 s = "%s #%s (+%s,-%s)" % (t, id, up, down)
                 if random and s not in self.randomData[site]:
                     self.randomData[site].append(s)
+        elif site == 'textsfromlastnight.com':
+            for item in self._gkREDict[site].finditer(html):
+                d = item.groupdict()
+                self.log.info('%s %s', d, repr(html))
+                t = d['text']
+                t = utils.web.htmlToText(t, tagReplace='').strip()
+                t = self._joiner.join(s.splitlines())
+                up = d['up']
+                down = d['down']
+                id = d['id']
+                s = "%s #%s (+%s,-%s)" % (t, id, up, down)
+                if random and s:
+                    if s not in self.randomData[site]:
+                        self.randomData[site].append(s)
+                else:
+                    break
         else:
             for item in self._gkREDict[site].finditer(html):
                 s = item.groupdict()['text']
             return s
 
     def geekSnarfer(self, irc, msg, match):
-        r'http://(?:www\.)?(?P<site>bash\.org|qdb\.us|viedemerde\.fr|fmylife\.com)/\??(?P<id>\d+)'
+        r'http://(?:www\.)?(?P<site>bash\.org|qdb\.us|viedemerde\.fr|fmylife\.com|textsfromlastnight\.com)/\??(?P<id>\d+)'
         if not self.registryValue('geekSnarfer', msg.args[0]):
             return
         id = match.groupdict()['id']
         self._gkBackend(irc, msg, site, id)
     fml = wrap(fml, [additional(('id', 'fml'))])
 
+    def tfln(self, irc, msg, args, id):
+        """[<id>]
+
+        Returns a random quote from textsfromlastnight.com; the optional
+        argument <id> specifies which quote to retrieve.
+        """
+        site = 'textsfromlastnight.com'
+        self._gkBackend(irc, msg, site, id)
+    tfln = wrap(tfln, [additional(('id', 'tfln'))])
+
 Class = Geekquote