pyinspire / pyinspire /


Author: Ian Huston

Released under the modified BSD license.

import feedparser
import time
import datetime
import re


def time_to_datetime(timetuple):
    return datetime.datetime.fromtimestamp(time.mktime(timetuple))

def strip_newlines(s):
    return " ".join([l.strip() for l in s.splitlines()])

def arxivid_from_url(url):
    """Return arxiv id and version number given arxiv url."""
    regex = re.compile("/abs/(\d{4}.\d{4}|[a-z-]*/\d{7})v?(\d*)$")
    r =
    arxivid, versionstring = r.groups()
    if versionstring is '':
        version = 0
        version = int(versionstring)
    return arxivid, version

def entries_from_id_list(id_list):
    """Return entries for a given list of arxiv ids."""
    idstring = ",".join(id_list)
    querystring = "id_list=" + idstring
    return get_inspire_entries(querystring)

def get_inspire_entries(querystring):
    """Return entries for a given query string."""
    ac = InspireClient()
    feedresult = ac.fetch(querystring)
    entries = feedresult.entries
    numresults = int(feedresult.feed.opensearch_totalresults)
    if numresults == 0:
        entries = []
    if len(entries) == 1:
        if "title" not in entries[0]:
            entries = []
        elif entries[0].title=="Error":
            raise IOError("Error retrieving results from Inspire: " 
                          + entries[0].summary)
    #Get rid of empty entries
    entries = [e for e in entries if "title" in e]
    return entries

class InspireClient(object):
    """Client to access Inspire site which stores last run time."""
    lasttime = 0
    def __init__(self, apiurl=None):
        if not apiurl:
            self.apiurl = APIURL
            self.apiurl = apiurl
    def fetch(self, querystring):
        """Call the api site with the querystring and return result."""
        #Wait for at least one second between calls
        timediff = time.time() - InspireClient.lasttime
        if timediff < 1:
            #print("Waiting %s seconds..." % str(1 -timediff))
            time.sleep(1 - timediff)
        fullurl = self.apiurl + querystring
        result = feedparser.parse(fullurl)
        InspireClient.lasttime = time.time()
        return result