1. Ned Batchelder
  2. lifeboat

Commits

Ned Batchelder  committed a9aaaf4

Now you can download other peoples' public tabblos.

  • Participants
  • Parent commits 8b9c7e5
  • Branches default

Comments (0)

Files changed (1)

File save_tabblos.py

View file
 from contextlib import closing
 
 MINE = "http://www.tabblo.com/studio/view/mine/"
+THEIRS = "http://www.tabblo.com/studio/view/tabblos/%s/"
 NAVBAR = '<a class="navbarlinks" href="http://www.tabblo.com/studio/person/%s">Hi '
 
+RE_MY_STORIES = r"javascript:Tabblo.site.deleteStory\((\d+),"
+RE_THEIR_STORIES = r"""<a (?:style="font-size:18px;font-family:verdana;"|class="little") href="/studio/stories/view/(\d+)/">"""
+
 EXTRA_CSS_SPOT = "</style><script>"
 
 EXTRA_CSS = """\
     ids_txt_fname = "ids.txt"
 
     def __init__(self, username, password, original, logfn):
-        self.username = username
+        if "@" in username:
+            self.login_name, self.username = username.split("@")
+            self.mine = False
+        else:
+            self.login_name = self.username = username
+            self.mine = True
         self.password = password
         self.original = original
         self.log = logfn
+
         self.logged_in = False
         self.skipped = []
 
         if login and not self.logged_in:
             # Visit /studio/login, don't need to do anything with the page, it will
             # set a cookie that future reads will use automatically.
-            p = urllib.urlencode({ 'username': self.username, 'password': self.password })
+            p = urllib.urlencode({ 'username': self.login_name, 'password': self.password })
             with closing(self.open_url('https://www.tabblo.com/studio/login/?s=1', "Logging in", p, login=False)) as f:
                 content = f.read()
                 # Why does tabblo.com not just return 302 for redirects??
                 if '<meta http-equiv="Refresh" content="0; url=http://www.tabblo.com/studio/?pi=3">' in content:
                     with closing(self.open_url('http://www.tabblo.com/studio/?pi=3', login=False)) as f:
                         content = f.read()
-                if (NAVBAR % self.username) not in content:
+                if (NAVBAR % self.login_name) not in content:
                     raise HarvesterException("Couldn't log in.")
             self.logged_in = True
 
         ids = []
 
         # Loop over all the pages of tabblos. 
-        url = MINE
+        if self.mine:
+            url0 = MINE
+            re_stories = RE_MY_STORIES
+        else:
+            url0 = THEIRS % self.username
+            re_stories = RE_THEIR_STORIES
+
+        url = url0
         while url:
             # Tabblo returns short pages sometimes!?
             for retry in range(10):
                 raise HarvesterException("Couldn't get a complete page, tried 10 times.")
 
             # Find tabblo ids by looking for a specific string in the URLs.
-            for id in re.findall(r"javascript:Tabblo.site.deleteStory\((\d+),", page):
+            for id in re.findall(re_stories, page):
                 ids.append(int(id))
 
             # Find the next page of tabblos.
-            match = re.search(r'<a href="/studio/view/mine/(\d+)">More</a>', page)
+            match = re.search(r'/(\d+)">More</a>&nbsp;&#xbb;', page)
             if match:
-                url = MINE + match.group(1)
+                url = url0 + match.group(1)
             else:
                 url = None
 
                 md['date'] = time.strftime("%b %d, %Y", time.strptime(md['created'], "%Y-%m-%dT%H:%M:%S")).replace(" 0", " ")
                 metadata[id] = md
 
-        tocs = [
-            # name          details     selector
-            ('all',         True,       lambda md: True),
-            ('published',   True,       lambda md: md['status'] == 'published'),
-            ('public',      False,      lambda md: md['status'] == 'published' and md['access'] == 'public'),
-            ]
+        if self.mine:
+            tocs = [
+                # name          details     selector
+                ('all',         True,       lambda md: True),
+                ('published',   True,       lambda md: md['status'] == 'published'),
+                ('public',      False,      lambda md: md['status'] == 'published' and md['access'] == 'public'),
+                ]
+        else:
+            tocs = [
+                ('public',      False,      lambda md: True),
+                ]
 
         for name, details, selector in tocs:
             # A list of all the HTML chunks for the tabblos.
             "up where it left off.\n"
             % (self.username,)
             )
-
         # To allow for more than one user on the same machine to save their tabblos,
         # cd into a subdirectory for the user.
         user_dir = re.sub(r"[^\w]", "", self.username)
         ids = self.get_ids()
         self.log("Found %d tabblos" % len(ids))
         self.download_tabblos(ids)
-        if self.skipped:
+        if self.mine and self.skipped:
             self.log("Skipped %d tabblos: %s." % (len(self.skipped), ", ".join(str(i) for i in self.skipped)))
             self.log("Correct those tabblos (if you haven't already deleted them) and rerun Lifeboat.")
             os.remove(self.ids_txt_fname)
         else:
+            ids = sorted(set(ids) - set(self.skipped))
             self.generate_toc(ids)