pyinspire / pyinspire /

Full commit

Author: Ian Huston

Released under the modified BSD license.
import sys
import urllib
from bs4 import BeautifulSoup
import feedparser
import time
import datetime
import re
import optparse
import logging

APIURL = "" 
log = logging.getLogger("pyinspire")
def time_to_datetime(timetuple):
    return datetime.datetime.fromtimestamp(time.mktime(timetuple))

def strip_newlines(s):
    return " ".join([l.strip() for l in s.splitlines()])

def arxivid_from_url(url):
    """Return arxiv id and version number given arxiv url."""
    regex = re.compile("/abs/(\d{4}.\d{4}|[a-z-]*/\d{7})v?(\d*)$")
    r =
    arxivid, versionstring = r.groups()
    if versionstring is '':
        version = 0
        version = int(versionstring)
    return arxivid, version

def entries_from_id_list(id_list):
    """Return entries for a given list of arxiv ids."""
    idstring = ",".join(id_list)
    querystring = "id_list=" + idstring
    return get_inspire_entries(querystring)

def get_inspire_entries(querystring):
    """Return entries for a given query string."""
    ac = InspireClient()
    feedresult = ac.fetch(querystring)
    entries = feedresult.entries
    numresults = int(feedresult.feed.opensearch_totalresults)
    if numresults == 0:
        entries = []
    if len(entries) == 1:
        if "title" not in entries[0]:
            entries = []
        elif entries[0].title=="Error":
            raise IOError("Error retrieving results from Inspire: " 
                          + entries[0].summary)
    #Get rid of empty entries
    entries = [e for e in entries if "title" in e]
    return entries

class InspireClient(object):
    """Client to access Inspire site which stores last run time."""
    lasttime = 0
    def __init__(self, apiurl=None):
        if not apiurl:
            self.apiurl = APIURL
            self.apiurl = apiurl
    def fetch(self, querystring):
        """Call the api site with the querystring and return result."""
        #Wait for at least one second between calls
        timediff = time.time() - InspireClient.lasttime
        if timediff < 1:
            #print("Waiting %s seconds..." % str(1 -timediff))
            time.sleep(1 - timediff)
        fullurl = self.apiurl + querystring
        result = feedparser.parse(fullurl)
        InspireClient.lasttime = time.time()
        return result

def query_inspire(search="", bibtex=False):
    """Query the INSPIRE HEP database and return the entries.

    search : string
             search string to use in query

    bibtex : boolean
             if True output is in bibtex format (citation info in comments)

    inspireoptions = dict(action_search="Search",
                          rg=100, #number of results to return in one page
                          of="hb", #brief format by default 
                          ln="en", #language
                          p="" # search string
    if bibtex:
        inspireoptions["of"] = "hx"
    inspireoptions["p"] = search

    url = APIURL + urllib.urlencode(inspireoptions)
    log.debug("Query URL is %s", str(url))
        f = urllib.urlopen(url)
        data =
    except IOError, e:
        log.error("Error retrieving results: %s", str(e))
    soup = BeautifulSoup(data)
    if bibtex:
        entries = [tag.text for tag in soup.find_all("pre")]
        mainbodies = soup.find_all("div", {"class":"record_body"})
        moreinfos = soup.find_all("div", {"class":"moreinfo"})
        if len(mainbodies) != len(moreinfos):
            raise ValueError("Number of records is inconsistent.")
        [t.small.ul.replaceWith("") for t in mainbodies]
    return soup 

def main(argv=None):
    """ Main method to deal with command line arguments.

    if not argv:
        argv = sys.argv
    #Parse command line options
    parser = optparse.OptionParser()
    parser.add_option("-s", "--search", action="store", dest="search",
                      metavar="STRING", help="search string to send to INSPIRE")
    parser.add_option("-b", "--bibtex",
                  action="store_true", dest="bibtex", default=False,
                  help="output bibtex for entries")
    parser.add_option("-v", "--verbose",
                  action="store_const", const=logging.INFO, dest="loglevel",
                  help="print informative messages", default=logging.INFO)
                  action="store_const", const=logging.DEBUG, dest="loglevel",
                  help="log lots of debugging information")
    (options, args) = parser.parse_args(args=argv[1:])
    log.debug("pyinspire called with the following options:\n %s", str(options)) 

        result = query_inspire(, options.bibtex, options.citations)
    except Exception, e:
        log.error("Error during retrieval of results: %s", str(e))
        return 1
    return 0

if __name__ == "__main__":