Source

pyinspire / pyinspire / pyinspire.py

'''
pyinspire

Author: Ian Huston

Released under the modified BSD license.
'''

import feedparser
import time
import datetime
import re

APIURL = "http://inspirehep.net/rss?ln=en&p="


def time_to_datetime(timetuple):
    return datetime.datetime.fromtimestamp(time.mktime(timetuple))

def strip_newlines(s):
    return " ".join([l.strip() for l in s.splitlines()])

def arxivid_from_url(url):
    """Return arxiv id and version number given arxiv url."""
    regex = re.compile("/abs/(\d{4}.\d{4}|[a-z-]*/\d{7})v?(\d*)$")
    r = regex.search(url)
    arxivid, versionstring = r.groups()
    if versionstring is '':
        version = 0
    else:
        version = int(versionstring)
    return arxivid, version


def entries_from_id_list(id_list):
    """Return entries for a given list of arxiv ids."""
    idstring = ",".join(id_list)
    querystring = "id_list=" + idstring
    
    return get_inspire_entries(querystring)

def get_inspire_entries(querystring):
    """Return entries for a given query string."""
    ac = InspireClient()
    feedresult = ac.fetch(querystring)
    entries = feedresult.entries
    numresults = int(feedresult.feed.opensearch_totalresults)
    if numresults == 0:
        entries = []
    if len(entries) == 1:
        if "title" not in entries[0]:
            entries = []
        elif entries[0].title=="Error":
            raise IOError("Error retrieving results from Inspire: " 
                          + entries[0].summary)
    #Get rid of empty entries
    entries = [e for e in entries if "title" in e]
        
    return entries

class InspireClient(object):
    """Client to access Inspire site which stores last run time."""
    lasttime = 0
    
    def __init__(self, apiurl=None):
        if not apiurl:
            self.apiurl = APIURL
        else:
            self.apiurl = apiurl
        
    def fetch(self, querystring):
        """Call the api site with the querystring and return result."""
        
        #Wait for at least one second between calls
        timediff = time.time() - InspireClient.lasttime
        if timediff < 1:
            #print("Waiting %s seconds..." % str(1 -timediff))
            time.sleep(1 - timediff)
            
        fullurl = self.apiurl + querystring
        result = feedparser.parse(fullurl)
        InspireClient.lasttime = time.time()
        return result