Commits

beroe  committed 4cd21df

added minimal crossref parser

  • Participants
  • Parent commits e0ef806

Comments (0)

Files changed (1)

File database/crossref_parse.py

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+import re
+import subprocess
+
+RefList = """
+Brown, R. M., Ferner, J. W. & Diesmos, A. C. Herpetologica 53, 357–373 (1997).
+Webb, R. G. Herpetologica 34, 422–425 (1978).
+Inger, R. F. Fieldiana Zool. 33, 183–531 (1954).
+Savage, J. M. The Amphibians and Reptiles of Costa Rica (Univ. Chicago. Press, 2002).
+Merrill, E. D. Science 101, 401 (1945).
+Diesmos, A. C., Brown, R. M. & Gee, G. V. A. Sylvatrop 13, 63–80 (2003).
+Taylor, E. H., Leonard, A. B., Smith, H. M. & Pisani, G. R. Monogr. Mus. Nat. Hist. Univ. Kansas 4, 1–160 (1975).
+Taylor, E. H. The Caecilians of the World (Univ. Kansas Press, 1968).
+Brown, R. M. et al. Check List 8, 469–490 (2012).
+Brown, R. M., Siler, C. D., Diesmos, A. C. & Alcala, A. C. Herpetol. Monogr. 23, 1–44 (2009).
+"""
+URL_Template = 'curl -s "http://www.crossref.org/openurl/?title={0}&date={1}&volume={2}&spage={3}&pid=demo@practicalcomputing.org&redirect=false&format=unixref"'
+
+Dashes = r"\xe2\x80\x93" # weird unicode dash from web copy/paste...
+RemoveAuthors = r"^.*([A-Z]\. |al. )"
+GrabYear = r" \((\d+)\)\.?"
+Volume_Pages = u"(\d+), (\d+)-?\d*$"
+
+
+DEBUG = False
+
+for Line in RefList.split("\n"):
+	print "#"*50
+	Line = re.sub(Dashes,"-",Line.rstrip())
+	if DEBUG: print >> sys.stderr, Line
+	Line = re.sub(RemoveAuthors,"",Line)
+	if DEBUG: print >> sys.stderr, Line
+	YearGr = re.search(GrabYear,Line)
+	if YearGr:
+		Year = YearGr.group(1)
+		if DEBUG: print >> sys.stderr, Year
+		Line = re.sub(GrabYear,"",Line)
+		if DEBUG: print >> sys.stderr, "LINE:",Line
+		VolumeGr = re.search(Volume_Pages,Line)
+		if VolumeGr:
+			Volume,StartPg = VolumeGr.group(1,2)
+			if DEBUG: print >> sys.stderr, Volume,StartPg
+			Journal = re.sub(Volume_Pages,"",Line).strip().replace(" ","%20")
+			if DEBUG: print >> sys.stderr, Journal
+			Query = URL_Template.format(Journal,Year,Volume,StartPg)
+			if DEBUG: print >> sys.stderr, Query
+			RefString = subprocess.check_output(Query, stderr=subprocess.STDOUT,shell=True) 
+			print Query
+			print RefString