Commits

beno  committed dd83fb7

Changed how ISBNs are converted - only creating valid seeAlso's for suitably formed ISBN

  • Participants
  • Parent commits ce96d25

Comments (0)

Files changed (1)

File jiscobib/bltonquads.py

                    [\.\s]*$                  # can optionally end with spaces and/or periods
                    """, re.U|re.VERBOSE)
 
+# This isn't complex enough to actually validate the ISBN, merely checks to see if it is
+# of a suitable form for use in a URI, such as the seeAlso ones.
+ISBN_P = re.compile(r"^[0-9]{9,12}[0-9X]$", re.I) # looks for 9 (isbn10) to 12 (isbn13) digits followed by the checksum 0,1... 9, X (10)
+
 # include foaf and bio
 
 head = u"""<?xml version="1.0" encoding="utf-8" ?>
     literal_value = literal_value.replace("-", "")
     if literal_value.lower().startswith("urn:isbn:"):
       _urn, _xisbn, isbn = literal_value.lower().split(":", 2)
-      lines.append(u'    <bibo:isbn rdf:resource="%s"/>' % literal_value.lower())
-      lines.append(u'    <rdfs:seeAlso rdf:resource="%s" />' % URIRef("http://purl.org/NET/book/isbn/%s#book" % isbn))
-      lines.append(u'    <rdfs:seeAlso rdf:resource="%s" />' % URIRef("http://www4.wiwiss.fu-berlin.de/bookmashup/books/%s" % isbn))
+      if isbn.strip():
+        isbn = isbn.strip()
+        isbn = isbn.replace(" ", "").replace("-", "")  # strip out whitespace and hyphens
+        lines.append(u'    <bibo:isbn>%s</bibo:isbn>' % literal_value.lower())  # ISBNs as literals, due to garbage in them
+        if ISBN_P.match(isbn) != None:   # check to make sure the literal can make a valid URI
+          lines.append(u'    <rdfs:seeAlso rdf:resource="%s" />' % URIRef("http://purl.org/NET/book/isbn/%s#book" % isbn))
+          lines.append(u'    <rdfs:seeAlso rdf:resource="%s" />' % URIRef("http://www4.wiwiss.fu-berlin.de/bookmashup/books/%s" % isbn))
     elif literal_value.lower().startswith("(uk)"):
       lines.append(u'    <ov:blid>%s</ov:blid>' % literal_value[4:])
     else: