Commits

dan mackinlay committed 635c6c7

clone project in from possumpalace_kit - https://bitbucket.org/howthebodyworks/possumpalace_kit

  • Participants
  • Parent commits 93cd2bd

Comments (0)

Files changed (12)

File src/citeulike_api/README.rst

+========================
+CiteULike Client
+========================
+
+
+a python client for Citeulike.
+
+This implements a loose CiteULike interface according to the API pieced together from forum comments made by the CUL staff over time
+
+e.g.
+JSON searches http://www.citeulike.org/groupforum/2253
+form submission API http://www.citeulike.org/groupforum/700
+Edit URLs and JSON fields: http://www.citeulike.org/groupforum/2312

File src/citeulike_api/TODO.rst

+======
+TODOs
+======
+
+  * test (ha!)
+  * document (ha!)
+  * package just the citeulike code into an easy_installable thing
+  * parse success of edit and login operations
+  * CUL doesn't like more than one request every 5 seconds. Currently i use a
+    wait_for_api_limit() method to throttle connections, but this is error prone
+     - it would be better to subclass mechanize and enforce it there, plus
+    include a backoff (see next)
+  * The server is patchily available, at least from my ISP, so we should also
+    override the fetch methods to do retry with automatic backoff, e.g.
+    * https://gist.github.com/728327
+    * http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+  * downloaded PDFs should link back to their CUL page
+  * delete unused attachments. properly. as opposed to in code comments.
+  * cache cookies to save a page load

File src/citeulike_api/citeulike_api.py

+# encoding: utf-8
+"""
+fetch a supplied user's bookmarks somewhere, optionally writing them out into a file
+"""
+from string import Template
+import re
+import codecs
+from string import capwords
+from copy import deepcopy
+import importlib
+try:
+    import simplejson as json
+except ImportError:
+    import json
+from calendar import month_name
+from util import get_code_revision
+import time
+import mechanize
+import os.path
+import hashlib
+import openmeta
+from trasher import trash
+import glob
+
+def sha1_digest(userfiles):
+    """return a hex digest of some bytes"""
+    return hashlib.sha1(userfiles).hexdigest()
+    
+#from the CUL webform
+# <option value="BOOK" >Book</option> 
+# <option value="CHAP" >Book chapter/section</option> 
+# <option value="PAMP" >Booklet</option> 
+# <option value="INCOL" >Book part (with own title)</option> 
+# <option value="INCONF" >Conference proceedings (article)</option> 
+# <option value="CONF" >Conference proceedings (whole)</option> 
+# <option value="DATA" >Data file</option> 
+# <option value="ELEC" >Electronic citation</option> 
+# <option value="INPR" >In the press</option> 
+# <option value="JOUR" selected="selected">Journal article</option> 
+# <option value="MANUAL" >Manual (technical documentation)</option> 
+# <option value="GEN" >Miscellaneous</option> 
+# <option value="REP" >Technical report</option> 
+# <option value="STD" >Technical Standard</option> 
+# <option value="MTHES" >Thesis (Master's)</option> 
+# <option value="THES" >Thesis (PhD)</option> 
+# <option value="UNPB" >Unpublished work</option> 
+
+CUL_FIELDS = {
+  'JOUR': 'article', 
+  'BOOK': 'book', 
+  'PAMP': 'booklet', 
+  'CHAP': 'inbook', 
+  'INCOL': 'incollection', 
+  'INCONF': 'inproceedings', 
+  'MANUAL': 'manual', 
+  'MTHES': 'mastersthesis', 
+  'THES': 'phdthesis', 
+  'CONF': 'proceedings', 
+  'REP': 'techreport', 
+  'STD': 'techreport', 
+  'UNPB': 'unpublished', 
+  # '?patent': 'patent',
+  # '?collection': 'collection',
+  'ELEC': 'electronic',
+  'GEN': 'misc',
+  'DATA': 'misc',
+  'INPR': 'misc',
+}
+
+FIELDS_THAT_GET_SPURIOUS_BRACES = {
+  "journal": "journal",
+  "authors": "authors",
+  "abstract": "abstract",
+  "title": "title",
+  "location": "location",
+  "address": "address",
+  "booktitle": "title_secondary"
+}
+
+#anglocentric stopwords
+STOP_WORDS = set([
+  'In', 'To', 'For', 'At', 'With', 'A', 'An', 'And', 'The', 'On', 'By', 'Of'
+])
+
+_edit_priority_url = Template(
+  "http://www.citeulike.org/editpriority.json?"
+  "user_article_id=579067&article_id=1137810&"
+  "to_read=4&callback=jsonp1282733494417"
+)
+_all_bibtex_t = Template(
+  "http://www.citeulike.org/bibtex/user/${username}"
+  "?key_type=4&clean_urls=0"
+)
+_all_json_t = Template(
+  "http://www.citeulike.org/json/user/${username}"
+)
+_article_edit_t = Template(
+  "http://www.citeulike.org/edit_article_details?"
+  "username=${username}&article_id=${article_id}"
+)
+_article_view_t = Template(
+  "http://www.citeulike.org/user/${username}/article/${article_id}"
+)
+_attachment_t= Template(
+  "http://www.citeulike.org${path}"
+)
+
+_base_url = 'http://www.citeulike.org/'
+
+_cul = None
+
+class CulError(Exception):
+    pass
+
+# handy filters
+def BRACE_IN_TITLE(rec):
+    return rec['title'].find('{')>=0
+
+def BRACE_IN_QUESTIONABLE_LOCATION(rec):
+    field_vals = []
+    for field in FIELDS_THAT_GET_SPURIOUS_BRACES:
+        field_val = rec.get(field)
+        if field_val is None: continue
+        if isinstance(field_val, basestring):
+            field_vals.append(field_val)
+        else:
+            for sub_val in field_val:
+                field_vals.append(sub_val)
+    return any([
+      field_val.find('{')>=0
+      for field_val in field_vals
+    ])
+
+def HAS_SHOUT_CAPS(rec):
+    return rec['title']==rec['title'].upper()
+
+def get_browser(debug=False):
+    """create a browser, optionally with debugging enabled"""
+    import socket
+    socket.setdefaulttimeout(20.0)
+    import mechanize
+    
+    browser = mechanize.Browser()
+    
+    if debug:
+        import logging
+        import sys
+        # Log information about HTTP redirects and Refreshes.
+        browser.set_debug_redirects(True)
+        # Log HTTP response bodies (ie. the HTML, most of the time).
+        browser.set_debug_responses(True)
+        # Print HTTP headers.
+        browser.set_debug_http(True)
+
+        # To make sure you're seeing all debug output:
+        logger = logging.getLogger("mechanize")
+        logger.addHandler(logging.StreamHandler(sys.stdout))
+        logger.setLevel(logging.INFO)
+    return browser
+
+def fetch_all_bibtex(username="livingthingdan", password=None, filepath=None):
+    """fetch a whole library in bibtex format.
+    
+    we should probably do this authenticated using the below CUL client class"""
+    
+    cul = get_cul(username, password)
+    return cul.download_bibtex(dumpfile=filepath)
+
+def fetch_all_json(username="livingthingdan", password=None, filepath=None):
+    """fetch a whole library in JSON format."""
+    cul = get_cul(username, password)
+    return cul.download_json_index(json_cache=filepath)
+
+def tokenise(f):
+    return [t for t in re.split(r'[-\s]+', f) if t]
+    
+def topwords_iter(f):
+    """yield non-stopword tokens """
+    for t in tokenise(f):
+        t = stripped_word(t)
+        if t not in STOP_WORDS:
+            yield t
+
+def topwords(f):
+    """return non-stopword tokens"""
+    return list(topwords_iter(f))
+
+def get_sirname(creators):
+    return stripped_word(tokenise(creators[0])[-1])
+    
+def stripped_word(w):
+    """remove punctuation and return a word initial-capped"""
+    return capwords(re.sub(r'[^\w]*', '', w))
+
+def strip_html_tags(value):
+    """Returns the given HTML with all tags stripped."""
+    return re.sub(r'<[^>]*?>', '', unicode(value))
+
+def create_key(obj):
+    """create a citeulike-like AuthorYearTitle key from a record.
+    probably not the same as theirs, which handles name prefixes better."""
+    key = []
+    creators = obj.get('authors', obj.get('editors', ['Anonymous']))
+    key.append(get_sirname(creators))
+    if obj.get('published', False):
+        key.append(obj['published'][0])
+    key.append(topwords_iter(obj['title']).next())
+    return ''.join(key)
+    
+def strip_wrapping_braces(field):
+    #delete leading space and braces
+    field = re.sub(r'^[{\s]*', '', field)
+    #delete trailing space and braces
+    return re.sub(r'[}\s]*$', '', field)
+
+def strip_braces(field):
+    return field.replace('{', '').replace('}', '')
+
+def strip_tex_nonsense(ustring):
+    ustring = ustring.replace('\\', '')
+    ustring = ustring.replace(r'{', '')
+    ustring = ustring.replace(r'}', '')
+    ustring = ustring.replace(r'"', '')
+    return ustring
+    
+def open_article_for_edit(article_id, username, password):
+    # from subprocess import popen
+    import webbrowser
+    import urlparse
+    cul = get_cul(username, password)
+    edit_path = cul.get_edit_url(article_id)
+    edit_url = urlparse.urljoin(_base_url, edit_path)
+    webbrowser.open_new_tab(edit_url)
+
+def get_cul(username, *args, **kwargs):
+    """return the last browser used, or create a new one with the supplied
+    pasword"""
+    global _cul
+    if not _cul: 
+        _cul = CiteULike(username, *args, **kwargs)
+    return _cul
+    
+def auto_edit_article(article_id, username, password):
+    cul = get_cul(username, password)
+    cul.kill_cite_key(article_id)
+
+def clean_article(obj):
+    """returns a record dict with certain CUL fuckups corrected."""
+    obj = deepcopy(obj)
+    for creator_field in ('authors', 'editors'):
+        creator_list = obj.get(creator_field, None)
+        if not creator_list: continue
+        obj[creator_field] = [strip_braces(n) for n in creator_list]
+    return obj
+
+class CiteULike(object):
+    
+    MIN_API_WAIT = 5
+    
+    def __init__(self, username,
+          password=None,
+          json_cache=None,
+          attachment_path=None,
+          debug=False,):
+        """set up an object to access CUL
+        If you omit username but suply the path to a acached copy of the JSON output, then you can still do useful things in an "offline mode"
+        """
+        # first set up the socket handling to a value reflective of CiteULike's
+        # somewhat frequent, er, network difficulties
+        
+        #set up state for anonymous use
+        self.username = username
+        self.logged_in = False
+        self.debug = debug
+        
+        browser = get_browser(debug=debug)
+        browser.set_handle_robots(False)
+        user_agent_string = "CiteULikeApi %s" % get_code_revision()
+        browser.addheaders = [
+          ("User-agent", user_agent_string),
+        ]
+        self.browser = browser
+        
+        self.last_api_access = time.time() - self.MIN_API_WAIT
+        
+        #now, if password is provided, log in
+        if password is not None:
+            self.login(username, password)
+        
+        # set up record cache
+        self.records = []
+        self.json_cache = json_cache
+        
+        # if we ARE set up to use a cache file, try to load its contents now
+        if json_cache:
+            self.load_json_from_cache(json_cache)
+        
+        self.attachment_path = attachment_path
+        if attachment_path is not None:
+            self.attachment_path = os.path.realpath(attachment_path)
+        
+        if openmeta.is_openmeta_working():
+            self.tagger = openmeta.set_tags
+        else:
+            def _dummy_tagger(*args, **kwargs):
+                pass
+            self.tagger = _dummy_tagger
+    
+    def wait_for_api_limit(self, min_wait=0):
+        min_wait = max(min_wait, self.MIN_API_WAIT)
+        now = time.time()
+        elapsed_time = now - self.last_api_access
+        if elapsed_time<min_wait:
+            time.sleep(min_wait-elapsed_time)
+        self.last_api_access = time.time()
+    
+    def login(self, username, password):
+        browser = self.browser
+        
+        browser.open('http://www.citeulike.org/login?from=/')
+        self.wait_for_api_limit()
+        
+        browser.select_form(name='frm')
+        browser['password'] = password
+        browser['username'] = username
+        
+        self.wait_for_api_limit()
+        
+        try:
+            #handle redirects manually to avoid connection flakiness
+            browser.set_handle_redirect(False)
+            resp = browser.submit()
+        except mechanize.HTTPError, e:
+            if e.getcode()!=302 : raise e
+            next_page = e.info().getheader('Location')
+            if next_page == 'http://www.citeulike.org/' :
+                #success
+                self.logged_in = True
+            elif next_page.find('status=login-failed')>=0:
+                raise CulError('Login Failed')
+            else:
+                err = CulError('Unknown login response')
+                err.data = e
+                raise err
+        finally:
+            browser.set_handle_redirect(True)
+        
+    def get_edit_url(self, article_id):
+        return _article_edit_t.substitute(
+          username = self.username,
+          article_id=article_id)
+        
+    def cache_records(self, force_update=False):
+        """update myself with all relevant records for rapid searching"""
+        if not self.records or force_update:
+            self.records = self.download_json_index()
+        self.article_id_index()
+        return self.records
+    
+    def render(self, renderer_module='bibtex', ids=None,
+            attachment_path=None, *args, **kwargs):
+        self._check_index()
+        if attachment_path is None: attachment_path=self.attachment_path
+        renderer_module = importlib.import_module(
+          'citeulike.output.' + renderer_module)
+        renderer = renderer_module.Renderer(
+            attachment_path=attachment_path,
+            *args, **kwargs)
+        if ids is None:
+            records = self.records
+        else:
+            records = tuple(self.with_ids(ids))
+        return renderer.render(records)
+        
+    def with_ids(self, ids):
+        ids = set([unicode(i) for i in ids])
+        for id_ in ids:
+            yield self.article_lookup[id_]
+                
+    def download_json_index(self, json_cache=None):
+        """load the index JSON, optionally caching it locally"""
+        browser = self.browser
+        self.wait_for_api_limit()
+        content = browser.open(
+          _all_json_t.substitute(username = self.username)
+          ).read()
+        # Apparently we get utf-8 bytes back and have to decode to avoid ascii
+        # roundtripping
+        # it would be nice to use a stream decoder for this, but codecs doesn't
+        # like mechanize
+        content = codecs.decode(content, 'utf-8')
+        
+        json_cache = json_cache or self.json_cache
+        if json_cache:
+            with codecs.open(json_cache, 'w', encoding='utf-8') as f:
+                f.write(content)
+        return json.loads(content)
+        
+    def load_json_from_cache(self, json_cache):
+        self.json_cache = json_cache
+        with codecs.open(json_cache, 'r', encoding='utf-8') as f:
+            self.records = json.load(f)
+        self.article_id_index()
+    
+    def article_id_index(self):
+        article_lookup = {}
+        for i in xrange(len(self.records)):
+            article_lookup[self.records[i]['article_id']] = self.records[i]
+        self.article_lookup = article_lookup
+        
+    def download_bibtex(self, username = None, dumpfile=None):
+        """Download the CUL bibtex.
+        This is highly likely to be malformed. You'd be better off using the
+        bibtex_output methods"""
+        browser = self.browser
+        username = username or self.username
+        content = browser.open(
+          _all_bibtex_t.substitute(username = username)
+        )
+        if dumpfile:
+            with codecs.open(dumpfile, 'w', encoding='utf-8') as f:
+                f.write(content)
+        return content
+        
+    def _check_index(self):
+        """do we have records laoded?"""
+        if not self.records:
+            raise CulError("Empty record index. Have you run "
+              "load_json_from_cache or cache_records?")
+         
+    def bulk_apply(self, filt=None, method_names=None, method_args=None):
+        self._check_index()
+        
+        #what to do
+        if not method_names:
+            method_names=['kill_cite_key']
+        methods = [getattr(self, mn) for mn in method_names]
+        
+        if not method_args:
+            method_args = {}
+        
+        #what to do it to
+        if filt is None:
+            filt = lambda x: True
+        
+        for rec in self.records:
+            if not filt(rec): continue
+            print "Batching record %s, '%s'" % (
+              rec['article_id'], rec['title'])
+                  
+            for fn, fn_name in zip(methods, method_names):
+                print "applying %s" % fn_name
+                for attempt in range(4):
+                    try:
+                        fn(int(rec['article_id']), **method_args)
+                        break
+                    except IOError, e:
+                        print "network problem", unicode(e)
+                        if hasattr(e, 'getcode'):
+                            print "HTTP code", e.getcode()
+                        wait = 5*(2**attempt-1)
+                        print "retrying in %d seconds" % wait
+                        self.wait_for_api_limit(wait)
+                        
+        
+    def go_to_article_form(self, article_id, fields=None):
+        """point our invisible browser at the artcle change form"""
+        browser = self.browser
+        self.wait_for_api_limit()
+        browser.open(self.get_edit_url(article_id))
+
+        browser.select_form(
+          predicate=lambda form: form.attrs.get('id')=='article'
+        )
+        return browser
+    
+    def submit_article_form(self):
+        self.wait_for_api_limit()
+        resp = self.browser.submit()
+        if not resp.geturl().startswith('http://www.citeulike.org/user/'):
+            raise CulError("some kind of form failure")
+    
+    #### Particular handy things i need to do often
+    def kill_cite_key(self, article_id):
+        """blank out the bibtex key in case you imported or copied the article
+        from someone else who uses a convention you don't like.
+        Less important now that CUL export multiple cite keys."""
+        browser = self.go_to_article_form(article_id)
+        browser.form['bibtex_import_cite'] = ""
+        self.wait_for_api_limit()
+        self.submit_article_form()
+    
+    def kill_braces(self, article_id, fields=None):
+        """purge the braces that CUL wraps fields with sometimes"""
+        if fields is None:
+            fields = FIELDS_THAT_GET_SPURIOUS_BRACES
+        print 'checking %s for braces' % article_id
+        browser = self.go_to_article_form(article_id)
+        for json_name, form_name in fields.iteritems():
+            questionable_content = browser.form[form_name]
+            better_content = '\r\n'.join([
+               strip_wrapping_braces(line) for line in questionable_content.splitlines()
+            ])
+            browser.form[form_name] = better_content
+            print '  updating %s' % json_name 
+        self.wait_for_api_limit()
+        self.submit_article_form()
+    
+    def fix_title_case(self, article_id):
+        from titlecase import titlecase
+        """PURGE SHOUT CASE TITLE!!!1!"""
+        print 'updating %s for silly title caps' % article_id
+        browser = self.go_to_article_form(article_id)
+        questionable_content = browser.form['title']
+        browser.form['title'] = titlecase(questionable_content)
+        print '  updating "%s"' % questionable_content 
+        self.wait_for_api_limit()
+        self.submit_article_form()
+
+    def download_attachments(self, article_id,
+            attachment_path=None,
+            force=False,
+            sync_tags=True):
+        """sync my pdfs etc to disk from a given article"""
+        browser = self.browser
+        article_id = unicode(article_id)
+        if attachment_path:
+            attachment_path = os.path.realpath(attachment_path)
+        else:
+            attachment_path = self.attachment_path
+        article_meta = self.article_lookup[article_id]
+        attachments = article_meta.get('userfiles', [])
+        for attachment in attachments:
+            local_path = os.path.join(attachment_path, attachment["name"])
+            print "inspecting local path %s" % os.path.abspath(local_path)
+            remote_url = _attachment_t.substitute(path=attachment["path"])
+            #if the force flag is on, let's DO it
+            do_download = force
+            # elsewise, check if we need to
+            if not force:
+                if not os.path.exists(local_path):
+                    print "local path %s does not exist. downloading" % \
+                      os.path.abspath(local_path)
+                    do_download = True
+                else: #Is this sane? overwrites local annotations
+                    local_sha1 = sha1_digest(open(local_path, 'rb').read())
+                    if attachment['sha1'] != local_sha1:
+                        do_download = True
+                        print "local path SHA %s does not equal remote SHA %s" \
+                          " Downloading." % \
+                          (local_sha1, attachment['sha1'])
+                          
+            if do_download:
+                remote_path = _attachment_t.substitute(path=attachment['path'])
+                self.wait_for_api_limit()
+                content = browser.open(remote_path).read()
+                with open(local_path, 'wb') as f:
+                    f.write(content)
+            else:
+                print "...but no need to download this one"
+            
+            if sync_tags:
+                self.tagger(local_path, article_meta.get('tags', []))
+                
+    def delete_unknown_attachments(self, attachment_path=None):
+        """for now, this only handles orphan PDFs"""
+        if attachment_path:
+            attachment_path = os.path.realpath(attachment_path)
+        else:
+            attachment_path = self.attachment_path
+        known_attachments = set()
+        for record in self.records:
+            for att in record.get('userfiles', []):
+                known_attachments.add(
+                  os.path.abspath(
+                    os.path.join(
+                      attachment_path, 
+                      att['name']
+                    )
+                  )
+                )
+        all_attachments = set(glob.glob(
+          os.path.join(attachment_path, '*.pdf')
+        ))
+        
+        for unknown_attachment in (all_attachments-known_attachments):
+            print "unknown attachment '%s'" % unknown_attachment
+            trash(os.path.abspath(unknown_attachment))
+            
+                

File src/citeulike_api/output/__init__.py

Empty file added.

File src/citeulike_api/output/base.py

+# -*- coding: utf-8 -*-
+from jinja2 import Environment, PackageLoader
+# from jinja2 import Template
+# template = Template('Hello {{ name }}!')
+# >>> template.render(name='John Doe')
+# u'Hello John Doe!'
+
+class BaseRenderer(object):
+    """Common base-class for renderers."""
+    
+    def __init__(self,
+          template_name=None,
+          env_args=None,
+          *args, **kwargs):
+        if template_name is not None:
+            self.template_name = template_name
+        # env_args.setdefault(
+        if env_args is None:
+            env_args={}
+        env = Environment(
+          loader=PackageLoader('citeulike', 'templates',),
+          **env_args)
+        self.env = env
+    
+    def get_template(self, template_name=None):
+        if not template_name:
+            template_name = self.template_name
+        return self.env.get_template(template_name)
+    
+    def render(self, record_list, *args, **kwargs):
+        return self.get_template().render(
+          record_list=record_list, *args, **kwargs)

File src/citeulike_api/output/bibtex.py

+# -*- coding: utf-8 -*-
+from citeulike import citeulike_api
+from citeulike.citeulike_api import strip_html_tags, strip_tex_nonsense, strip_wrapping_braces
+import codecs
+import os.path
+from sphinx.util import texescape
+texescape.init()
+from Recode import Recodec
+tex_codec = Recodec('utf8..ltex')
+from collections import defaultdict
+from base import BaseRenderer
+
+class Renderer(BaseRenderer):
+    
+    # template_name = "bibtex_plain.bib"
+    
+    def __init__(self,
+          escape_diacritics=False,
+          escape_markup=True,
+          attachment_path='.',
+          *args, **kwargs):
+        self.escape_diacritics = escape_diacritics
+        self.escape_markup = escape_markup
+        self.attachment_path = attachment_path
+        super(Renderer, self).__init__(*args, **kwargs)
+    
+    def render(self, record_list):
+        return list2bib(record_list,
+          escape_diacritics=self.escape_diacritics,
+          escape_markup=self.escape_markup,
+          attachment_path=self.attachment_path)
+
+def list2bib(arr, escape_diacritics=False,
+            escape_markup=True,
+            attachment_path='.'):
+    """converts a list of python dicts representing the CUL JSON to a bibtex
+    string
+    
+    TODO:
+      * sort consistently
+      * avoid duplicate keys (Titmuss, for example)
+      * handle multiple field-stripping logics (HTML-friendly, BibTeX-friendly)
+      * handle multiple character encodings
+    """
+    formatted = []
+    for e in arr:
+        formatted.append(dict2bib(e,
+          escape_diacritics=escape_diacritics,
+          escape_markup=escape_markup,
+          attachment_path=attachment_path))
+    return '\n'.join(formatted)
+
+def dict2bib(obj,
+        escape_diacritics=False,
+        escape_markup=True,
+        attachment_path='.'):
+    """converts a single python dict to a bibtex
+    entry
+    """
+    lines = []
+    obj = citeulike_api.clean_article(obj)
+    # import pdb; pdb.set_trace()
+    key = citeulike_api.create_key(obj)
+    lines.append('@%s{%s,' % (citeulike_api.CUL_FIELDS[obj['type']], key))
+    bib_fields = defaultdict(list)
+    pages = ''
+    filters = strip_wrapping_braces,
+    do_not_escape = set(['Local-Url', 'keywords', 'tags', 'doi'])
+    if escape_diacritics:
+        filters += tex_diacriticize,
+    if escape_markup:
+        filters += tex_escape,
+    def filt(v):
+        for f in filters:
+            v = f(v)
+        return v
+    
+    for k,v in obj.iteritems():
+
+        #fields not requiring translation
+        if k in ("title", "volume", "journal",
+                "chapter", "issue", "citation",
+                "institution", "organization",
+                "booktitle", "series", "publisher",
+                "location", "issn", "isbn",
+                "address", "how_published",
+                "edition", "school", "doi", "citation"):
+            bib_fields[k].append(v)
+        elif k in ('type', 'citation_keys',
+                'username', 'posted_count',
+                'article_id'): 
+            pass #wilfully ignore dull fields
+        elif k in ('notes', 'date', 
+                'priority', 'rating', 'date_other'): 
+            pass #too complicated or confusing for now
+        elif k=='abstract':
+            bib_fields['abstract'].append(
+              strip_tex_nonsense(strip_html_tags(v)))
+        elif k=="authors":
+            bib_fields['author'].append( u" and ".join(v))
+        elif k=="editors":
+            bib_fields['editor'].append( u" and ".join(v))
+        elif k=="start_page":
+            pages = v + (bib_fields['pages'] or '-')
+        elif k=="end_page":
+            pages = (pages or '-') + v
+        elif k=="tags":
+            bib_fields['keywords'].append(u"; ".join(v))
+            bib_fields['tags'].append(u", ".join(v))
+            bib_fields['Tags'].append(u"; ".join(v))
+        elif k=="published":
+            try:
+                bib_fields['year'].append(v[0])
+                bib_fields['month'].append(
+                  citeulike_api.month_name[int(v[1])][:3])
+                bib_fields['day'].append(v[2])
+            except IndexError:
+                pass
+        elif k=='linkouts':
+            for lo in v:
+                bib_fields['Url'].append( lo['url'])
+        elif k=='userfiles':
+            for uf in v:                
+                bib_fields['Local-Url'].append(
+                  os.path.join(attachment_path, uf['name'])
+                )
+        elif k=='href':
+            if not v in bib_fields['Url']:
+                bib_fields['Url'].insert(0, v)
+        else:
+            print u"unhandled field : %s: %s" % (k, unicode(v))
+    if pages:
+        bib_fields['pages'].append(pages)
+    for k, v_list in bib_fields.iteritems():
+        for v in v_list:
+            if k not in do_not_escape:
+                escaped_line = filt(v)
+            else:
+                escaped_line = v
+            has_quotes, has_braces, has_non_numeric = False, False, False
+            try:
+                v = unicode(int(v))
+            except ValueError:
+                has_non_numeric = True
+            if v.find('"')>=0:
+                has_quotes = True
+            if v.find('{')>=0:
+                has_braces = True
+            if has_braces or has_non_numeric:
+                escaped_line = escaped_line.join(['"', '"'])
+            if has_quotes:
+                escaped_line = escaped_line.join(['{', '}'])
+            if has_quotes and has_braces:
+                print "warning: field with quotes and braces:", escaped_line
+            next_line = u'    %s = %s,' % (k, escaped_line)
+            lines.append(next_line)
+        
+    lines.append(u'}')
+    return u'\n'.join(lines)
+
+def tex_escape(field):
+    """
+    convert unicode field into a TeX-macro'd unicode field with tricky chars
+    escaped such that an ascii or utf-8 representation won't choke old
+    BibTeX. (eg. <, {, \, " are cleaned up)
+    
+    We assume that there is no TeX markup already.
+    
+    A smart-type-guessing thingy might be necessary for, e.g. abstracts.
+    See:
+    http://www.astro.rug.nl/~kuijken/latex.html
+    or
+    http://www.math.uiuc.edu/~hildebr/tex/course/intro1.html
+    for a guide to escaping
+    """
+    return unicode(field).translate(texescape.tex_escape_map)
+    # return codecs.encode(
+    #   unicode(field).translate(texescape.tex_escape_map),
+    # 'utf-8') #don't presume ascii safe because of non-eng chars
+
+def tex_diacriticize(field):
+    """take a unicode field and convert it to ASCII-safe using latex
+    escaping"""
+    field = codecs.encode(field, 'utf-8')
+    translated, length = tex_codec.encode(field, 'replace')
+    return unicode(translated)
+

File src/citeulike_api/output/rst.py

+# -*- coding: utf-8 -*-
+from base import BaseRenderer
+from citeulike.citeulike_api import strip_wrapping_braces
+
+
+class Renderer(BaseRenderer):
+    
+    template_name = "rst_plain.rst"
+    
+    def __init__(self,
+          *args, **kwargs):
+        super(Renderer, self).__init__(*args, **kwargs)
+        self.env.filters['restifytitle'] = restifty_title
+        self.env.filters['stripbraces'] = strip_wrapping_braces
+        
+def restifty_title(title):
+    t = [
+      "="*len(title),
+      title,
+      "="*len(title),
+      "",
+      "",]
+    return "\n".join(t)

File src/citeulike_api/templates/bibtex_plain.bib

+{% for record in record_list %}
+@record.{{ record.title|stripbraces|restifytitle }}
+.. [{{record.citation_keys[1]}}] {{ record.title|stripbraces }}. {% if record.published %} {{ record.published.0 }}. {% endif %}{% if record.authors %} by {{ record.authors|join(', ') }}. {% endif %}{% if record.editors %} edited by {{ record.editors|join(', ') }}.{% endif %}
+  `View on citeulike <{{record.href}}>`_
+  
+{% endfor %}

File src/citeulike_api/templates/rst_plain.rst

+{% for record in record_list %}
+{{ record.title|stripbraces|restifytitle }}
+.. [{{record.citation_keys[1]}}] {{ record.title|stripbraces }}. {% if record.published %} {{ record.published.0 }}. {% endif %}{% if record.authors %} by {{ record.authors|join(', ') }}. {% endif %}{% if record.editors %} edited by {{ record.editors|join(', ') }}.{% endif %}
+  `View on citeulike <{{record.href}}>`_
+  
+{% endfor %}

File src/citeulike_api/test_bibtex_output.py

+#!/usr/bin/env python
+# encoding: utf-8
+"""
+test_citeulike.citeulike2bib.py
+
+Created by dan mackinlay on 2010-12-09.
+Copyright (c) 2010 __MyCompanyName__. All rights reserved.
+"""
+
+import nose
+from nose.tools import *
+from output.bibtex import tex_diacriticize
+import codecs
+
+if __name__ == '__main__':
+    nose.main()
+
+def test_bib_tex_diacritics():
+    source_dest = (
+      # (u"field with {", "field with \{", "brace escaping"),
+      (u"Swedenbørg", u"Swedenb\o{}rg"), 
+      (u"jalapeño", u"jalape\~no"),
+      (u"garçon",u"gar\c{c}on"),
+      (u"Århus", u"\AA{}rhus"),
+      (u"L'Hôpital", u"L'H\^opital"),
+    )
+    for src, dest in source_dest:
+        yield assert_equals, dest, tex_diacriticize(src)

File src/citeulike_api/test_citeulikeapi.py

+#!/usr/bin/env python
+# encoding: utf-8
+"""
+test_citeulikeapi.py
+
+Created by dan mackinlay on 2010-12-10.
+Copyright (c) 2010 __MyCompanyName__. All rights reserved.
+"""
+
+import nose
+from nose.tools import *
+import citeulike_api
+
+if __name__ == '__main__':
+    nose.main()
+
+def test_keygeneration():
+    """test author year title key generation"""
+    source_dest = (
+        ({
+            "article_id": "330004",
+            "title": "Information and Randomness : An Algorithmic Perspective",
+            "published": ["2002"],
+        }, "Anonymous2002Information", None),
+        ({
+            "article_id": "1296581",
+            "title": "Draft Ecological Risk Assessment for the Effects of Fishing: South East Trawl and Danish Seine Fishery",
+            "published": ["2004"],
+            "editors": [ "Hobday", "Smith", "Stobutzki"],
+        }, "Hobday2004Draft", None),
+        ({
+            "article_id": "1301808",
+            "title": "Algorithmic information theory",
+            "published": ["1987"],
+            "authors": [ "Gregory J. Chaitin"],
+        }, "Chaitin1987Algorithmic", None),
+        ({
+            "article_id": "606459",
+            "username": "livingthingdan",
+            "title": "The Evolution of Cooperation",
+            "published": ["1985","10","01"],
+            "authors": [ "Robert Axelrod"]
+        },"Axelrod1985Evolution", "complex date")
+    )
+    for (src, dest, test_name) in source_dest:
+        yield assert_equals, citeulike_api.create_key(src), dest, test_name

File src/citeulike_api/util.py

+#!/usr/bin/env python
+# encoding: utf-8
+"""
+util.py
+
+Created by dan mackinlay on 2010-12-15.
+Copyright (c) 2010 __MyCompanyName__. All rights reserved.
+"""
+import subprocess
+import shlex
+
+def run_process(plain_old_command_string):
+    """run a command given in a string and return the output"""
+    return subprocess.Popen(
+      shlex.split(plain_old_command_string),
+      stdout=subprocess.PIPE
+    ).communicate()[0]
+
+def get_code_revision():
+    """return revision number of hg tip"""
+    return run_process("hg tip --template {node}")