Commits

ipe  committed 4f6bedd

Added magazines, book, place, publisher controllers. Data added to rdf endpoints (XML and JSON still in development). View and Resources added to controllers. Main.py rnamed to command. RDF parsing added to create places index.

  • Participants
  • Parent commits 923def3

Comments (0)

Files changed (36)

File openletters/cli.py

     db create
     db clean
     db rebuild # clean and create
-    # db upgrade [{version no.}] # Data migrate
+    # db upgrade [{version no.}] # Data migrate 
     '''
     summary = __doc__.split('\n')[0]
     usage = __doc__
         cmd = self.args[0]
         if cmd == 'dickens':
             fileobj = 'openletters/docs/dickens_letters.xml'
-            file_obj = 'openletters/docs/dickens_source.xml'
-            import openletters.main
-            openletters.main.load_dickens_letters(fileobj)
-            
-            openletters.main.load_source(file_obj)
+            source_obj = 'openletters/docs/dickens_source.xml'
+            book_obj = 'openletters/docs/dickens_texts.xml'
+            import openletters.command
+            openletters.command.load_dickens_letters(fileobj)
+            openletters.command.load_source(source_obj)
+            openletters.command.load_texts(book_obj)
 
         else:
             print 'Action not recognized'
 
 
 class Index(BaseCommand):
-    '''Index the letters for a Xapian powered search
+    '''
+       Index the letters for a Xapian powered search
     
-    index dickens  - indexes the Dickens letters
+       index dickens  - indexes the Dickens letters
     '''
     summary = __doc__.split('\n')[0]
     usage = __doc__
         if cmd == 'dickens':
             type = 'dickens'
             fileobj = 'openletters/docs/dickens_letters.xml'
-            import openletters.main
+            import openletters.command
             
-            openletters.main.index_letters(self, type, fileobj)
+            openletters.command.index_letters(self, type, fileobj)
         else:
             print 'Action not recognized'

File openletters/command.py

+# -*- coding: latin-1 -*-
+'''
+Class to parse the Dickens letters and enter into a store
+'''
+import unicodedata
+from xml.dom import minidom
+import urllib, os
+
+from openletters.parse import parse_text, parse_date
+from openletters import model
+
+def getText(nodelist):
+    rc = []
+    for node in nodelist:
+        if node.nodeType == node.TEXT_NODE:
+            rc.append(unicodedata.normalize('NFKC', node.data))
+    return ''.join(rc)
+
+def handle_elements (elementname, element):
+    e = element.getElementsByTagName(elementname)
+    
+    for name in e:
+        return handle_parts(elementname, name)
+
+    
+def handle_parts (nodename, node):
+    return getText(node.childNodes)
+    
+
+def load_dickens_letters(fileobj, verbose=True):
+    #read = fileobj.read()
+    text = minidom.parse(fileobj)
+
+    #split the body into individual letters
+    letters  = text.getElementsByTagName('div')
+ 
+    vol = 1
+    count = 1
+    for letter in letters:
+        modelletter = model.Letter(
+                    volume=handle_elements("volume", letter), 
+                    type=u'dickens',
+                    correspondent = handle_elements("correspondent", letter), 
+                    salutation=unicode(handle_elements("salutation", letter)),
+                    letter_text=unicode(handle_elements("letter", letter)),
+                    letter_date=unicode(handle_elements("date", letter))
+                    )
+        print "date", unicode(handle_elements("date", letter))
+        model.Session.add(modelletter)
+        model.Session.commit()
+    
+        if verbose:
+            print('Letter %s: \n\t ...' % (count))
+            model.Session.remove()
+        else:
+            print('Letter %s: SKIPPING' % (count))
+
+def load_source (fileobj, verbose=True):
+    
+    source_text = minidom.parse(fileobj)
+    
+    letters  = source_text.getElementsByTagName('source')
+    title = ''
+    for letter in letters:
+        modelsource = model.Source (
+               source_id=unicode(handle_elements("id", letter)),   
+               title=unicode(handle_elements("title", letter)), 
+               author=unicode(handle_elements("author", letter)),   
+               publn_data=unicode(handle_elements("publication", letter)),
+               publn_date=unicode(handle_elements("date", letter)), 
+               s_url=unicode(handle_elements("url", letter)),                 
+            )
+        
+        model.Session.add(modelsource)
+        model.Session.commit()
+    
+        if verbose:
+            print('Source %s: \n\t ...' % (title))
+            model.Session.remove()
+        else:
+            print('Source : SKIPPING')
+            
+def load_texts (fileobj, verbose=True):
+    
+    source_text = minidom.parse(fileobj)
+    
+    letters  = source_text.getElementsByTagName('book')
+    title = ''
+    for letter in letters:
+        modelbook = model.Book (
+               book_id=unicode(handle_elements("id", letter)),   
+               book_title=unicode(handle_elements("title", letter)),
+               book_pub=unicode(handle_elements("mag_start", letter)),
+               book_end_pub=unicode(handle_elements("mag_end", letter)),  
+               aka=unicode(handle_elements("aka", letter)),
+               aka2=unicode(handle_elements("aka2", letter)),
+               description=unicode(handle_elements("description", letter)),
+               url=unicode(handle_elements("url", letter)),
+               source=unicode(handle_elements("source", letter)),
+            )
+        
+        model.Session.add(modelbook)
+        model.Session.commit()
+    
+        if verbose:
+            print('Source %s: \n\t ...' % (title))
+            model.Session.remove()
+        else:
+            print('Source : SKIPPING')
+
+
+def index_letters(self, type, fileobj):
+    import xapian
+
+    db_path = 'db'
+    
+    #database = xapian.WritableDatabase(db_path, xapian.DB_CREATE_OR_OPEN)
+    #open a writable database on the xapian-tcpsrvr
+    database = xapian.remote_open_writable("localhost/correspondence", 33333)
+    indexer = xapian.TermGenerator()
+    indexer.set_stemmer(xapian.Stem('english'))
+    
+    xapian_file_name = 0
+    count = 0
+    text = minidom.parse(fileobj)
+    #split the body into individual letters
+    letters  = text.getElementsByTagName('div')
+    #open the XML, parse the letter id
+    for letter in letters:
+        count +=1
+        text=unicode(handle_elements("letter", letter))
+        corr=unicode(handle_elements("correspondent", letter))
+            
+        document = xapian.Document()
+        document.set_data(text)
+        #not sure this is going to work - rather than using the filename, use letter ids
+        letter_index = type + "/" + urllib.quote(corr) + "/" + str(count)
+
+        print "indexing %s" ,letter_index
+        document.add_value(xapian_file_name, letter_index)
+        
+        indexer.set_document(document)
+        indexer.index_text(text)
+        database.add_document(document)
+        
+    database.flush()

File openletters/config/routing.py

     map.connect('/{controller}/{action}/{author}/{correspondent}')
     map.connect('/{controller}/{action}/{author}/{correspondent}/')
     map.connect('/{controller}/{action}/{author}/{correspondent}/{id}')
+    map.connect('/{controller}/{action}/{author}/{correspondent}/{id}/{type}')
 
     return map

File openletters/controllers/author.py

+import logging, urllib
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+from openletters.transform.transform_json import json_transform
+from openletters.transform.transform_xml import xml_transform
+from openletters.transform.transform_rdf import rdf_transform
+
+log = logging.getLogger(__name__)
+
+class AuthorController(BaseController):
+ 
+    def index (self):
+        c.author = "Charles Dickens"
+        return render('letters/authorindex.html')
+        
+    '''
+       Action to return the author and type. If no author, return the index
+    '''
+    def view (self, author=None):
+        c.author = "Charles Dickens"
+        if author is None: 
+            return render('letters/authorindex.html')
+        else:
+            c.author = u"Charles Dickens"
+            c.born = u"7 February 1812"
+            c.died = u"9 June 1870"
+            c.abstract = u"Charles John Huffam Dickens, pen-name 'Boz', was the most popular English novelist of the Victorian era, and one of the most popular of all time, responsible for some of English literature's most iconic characters. Many of his novels, with their recurrent theme of social reform, first appeared in periodicals and magazines in serialised form, a popular format for fiction at the time. Unlike other authors who completed entire novels before serial production began, Dickens often wrote them while they were being serialized, creating them in the order in which they were meant to appear. The practice lent his stories a particular rhythm, punctuated by one 'cliffhanger' after another to keep the public looking forward to the next installment. The continuing popularity of his novels and short stories is such that they have never gone out of print. His work has been praised for its mastery of prose and unique personalities by writers such as George Gissing and G. K. Chesterton, though the same characteristics prompted others, such as Henry James and Virginia Woolf, to criticize him for sentimentality and implausibility."
+            c.author_url = u"http://en.wikipedia.org/wiki/Charles_Dickens"
+            return render('letters/author.html')
+        
+    def resource (self, author=None, correspondent=None):
+        
+        if author is None:
+            abort(404)
+        else:
+            title = str(urllib.unquote(author))
+            if correspondent == "rdf":
+                response.headers['Content-Type'] = 'text/xml; charset=utf-8'
+                rdf = rdf_transform()
+                return rdf.create_author(title)

File openletters/controllers/book.py

+import logging
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+from openletters import model
+
+from openletters.transform.transform_json import json_transform
+from openletters.transform.transform_xml import xml_transform
+from openletters.transform.transform_rdf import rdf_transform
+
+log = logging.getLogger(__name__)
+
+class BookController(BaseController):
+
+    def index(self):
+        # Return a rendered template
+        #return render('/book.mako')
+        # or, return a response
+        c.titles = model.Session.query(model.Book).all()
+
+        return render('letters/magazineindex.html')
+    
+    def view (self, author=None):
+        
+        if author is None:
+            abort(404)
+        
+        else:
+            c.books = model.Session.query(model.Book).filter(model.Book.url == author)
+            return render('letters/book.html')
+        
+    def resource (self, author=None, correspondent=None):
+        
+        if author is None:
+            abort(404)
+            
+        if correspondent is None:
+            return redirect (controller=book, action=view, author=author)
+        
+        else:
+            books = model.Session.query(model.Book).filter(model.Book.url == author)
+            if correspondent == "rdf":
+                response.headers['Content-Type'] = 'text/xml'
+                rdf = rdf_transform()
+                return rdf.create_publication(author, "book")

File openletters/controllers/correspondent.py

+import logging
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+from openletters.transform.transform_rdf import rdf_transform
+
+from openletters.model import dbase
+
+log = logging.getLogger(__name__)
+
+class CorrespondentController(BaseController):
+
+    def index(self):
+        c.page_title = urllib.unquote(author)
+        c.author = author
+        c.nicks = self.corr_dict(author)
+        
+        return render('letters/correspondent.html')
+    
+    '''
+      Method to return details about a correspondent
+    '''
+    def view(self, author=None):
+        
+        c.page_title = urllib.unquote(author)
+        c.author = author
+        c.nicks = self.corr_dict(author)
+        
+        return render('letters/correspondent.html')
+    
+    '''
+      Method to return details about a correspondent
+    '''
+    def resource(self, author=None, correspondent=None):
+        
+        if author is None:
+            abort(404)
+
+        if correspondent == "rdf" or correspondent is None:
+            response.headers['content-type'] = 'text/xml; charset=utf-8'
+            rdf = rdf_transform()
+            return rdf.create_correspondent(author, self.corr_dict(author))
+        
+        elif correspondent == "xml":
+            response.headers['content-type'] = 'text/xml; charset=utf-8'
+            xml = xml_transform()
+            return xml.corres_xml(author, self.corr_dict(author))
+        
+        elif correspondent == "json":
+            response.headers['content-type'] = 'application/json;'
+            json = json_transform()
+            return json.corr_json(author, self.corr_dict(author))
+
+    
+    def corr_dict(self, corr):
+        
+        letter = {}  
+        letter = dbase.get_correspondent(corr)
+    
+        letter_items = letter.items()
+        letter_items.sort()
+        
+        return letter_items

File openletters/controllers/data.py

     '''
     def endpoint (self, author = '', correspondent = ''):
         
-            
         if author == "rdf":
-            response.headers['Content-Type'] = 'application/rdf+xml; charset=utf-8'
+            response.headers['Content-Type'] = 'application/rdf+xml'
             rdf = rdf_transform()
             return rdf.create_rdf_end()
         
         query_string = model.Session.query(model.Letter).filter(model.Letter.type == author).all()
         return json.book_json(query_string)
     
-    def correspondent(self):
-        req = request.POST('search')
-        corres =  model.Session.query(model.Letter.correspondent).distinct().all()
+    '''
+       Method to create correspondent rdf
+       '''
+    def correspondent(self,author=None, correspondent=None):
         
-        for c in corres:
-            if req in c:
-                b = '<li>%s</li>' % c
-        
-        return b    
+        if correspondent == "rdf":
+            response.headers['Content-Type'] = 'application/rdf+xml'
+            rdf = rdf_transform()
+            return rdf.create_correspondent(author)
+        if correspondent == "xml":
+            response.headers['Content-Type'] = 'text/xml'
+            xml = xml_transform()
+            return xml.corres_xml(author)       
     
-        
+        if correspondent == "json":
+            response.headers['Content-Type'] = 'application/json'
+            json = json_transform()
+            return json.corr_json(author)

File openletters/controllers/letters.py

                     c.type = model.Session.query(model.Source).get(letter.volume)
                     
                 return render('letters/view.html')
+    '''
+        Method to return a resource view of  the letter
+    '''  
+    def resource (self, author=None, correspondent=None, id=None, type=None):
         
-    
-    '''
-      Method to return details about a correspondent
-    '''
-    def correspondent(self, author=None):
-        
+        #format = request.headers.get('accept','')
+
+        #author is the base collection so cannot be empty
         if author is None:
             abort(404)
+          
+        query_string = model.Session.query(model.Letter).filter(model.Letter.type == author).all()
+
+        
+        if correspondent is not None:
+            corr = urllib.unquote(correspondent)
+            query_string = model.Session.query(model.Letter).filter(model.Letter.type == author).filter(model.Letter.correspondent == corr).all()
+        
+        if id is not None:
+            query_string = model.Session.query(model.Letter).filter(model.Letter.type == author).filter(model.Letter.correspondent == corr).filter(model.Letter.id == id).all()
             
-        format = request.headers.get('accept','')
-        #format = "application/xml"
-        #response.headers['content-type'] = 'text/xml; charset=utf-8'
+        if query_string is None or query_string == []:
+            abort(404)
+        
+        if type == "json":
+            
+            response.headers['Content-Type'] = 'application/json'
+            json = json_transform()
+            return json.to_dict(query_string, id)
+        
+        elif type == "xml":
+            response.headers['Content-Type'] = 'text/xml'
+            xml = xml_transform()
+            
+            if id is None:
+                return xml.index_xml(query_string)
+            else:
+                return xml.letter_xml(query_string)
+       
+        elif type == "rdf":
+            response.headers['Content-Type'] = 'text/xml'
+            rdf = rdf_transform()
+            return rdf.create_rdf_letter(query_string)
 
-        if format == "application/rdf+xml":
-            response.headers['content-type'] = 'application/rdf + xml; charset=utf-8'
-            rdf = rdf_transform()
-            return rdf.create_correspondent(author, self.corr_dict(author))
-        
-        elif format == "application/xml":
-            response.headers['content-type'] = 'text/xml; charset=utf-8'
-            xml = xml_transform()
-            return xml.corres_xml(author, self.corr_dict(author))
-        
-        elif format == "application/json":
-            response.headers['content-type'] = 'application/json;'
-            json = json_transform()
-            return json.corr_json(author, self.corr_dict(author))
-        else:
-            c.page_title = urllib.unquote(author)
-            c.author = author
-            c.nicks = self.corr_dict(author)
  
-            return render('letters/correspondent.html')
-            
-        return corr_rdf
 
     def corr_dict(self, corr):
         

File openletters/controllers/magazine.py

+import logging, urllib
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+from openletters.transform.transform_json import json_transform
+from openletters.transform.transform_xml import xml_transform
+from openletters.transform.transform_rdf import rdf_transform
+
+log = logging.getLogger(__name__)
+
+class MagazineController(BaseController):
+
+    def index(self):
+        c.magazines = {}
+        c.magazines = ["Household Works", "All the Year Round"]
+        return render('letters/magazineindex.html')
+    
+    '''
+       Action to return magazine details in html. resource controller = linked data
+       Todo: SPARQL query against dbpedia
+    '''
+    def view (self, author=None):
+        
+        if author is None:
+            c.magazines = {}
+            c.magazines = ["Household Works", "All the Year Round"]
+            return render('letters/magazineindex.html')
+        else:
+            mag = urllib.unquote(author)
+            if mag =="Household Works":
+                c.start = u"March 1850"
+                c.end = u"May 1859"
+                c.abstract = u"Household Words was an English weekly magazine edited by Charles Dickens in the 1850s which took its name from the line from Shakespeare 'Familiar in his mouth as household words' - Henry V"
+                c.mag_url = u"http://en.wikipedia.org/wiki/Household_Words"
+            elif mag =="All the Year Round":
+                c.start = u"28 January 1859"
+                c.end = u"30 March 1895"
+                c.abstract = u"All the Year Round was a Victorian periodical, being a British weekly literary magazine founded and owned by Charles Dickens, published between 1859 and 1895 throughout the United Kingdom. Edited by Dickens, it was the direct successor to his previous publication Household Words, abandoned due to differences with his former publisher. It hosted the serialization of many prominent novels, including Dickens' own A Tale of Two Cities. After Dickens's death in 1870, it was owned and edited by his eldest son Charles Dickens, Jr." 
+                c.mag_url = u"http://en.wikipedia.org/wiki/All_the_Year_Round"
+            else:
+                redirect_to(controller='magazine', action='view')
+                
+            return render('letters/magazine.html')
+     
+    '''
+       Method to return a resource view of the publication
+       @param author publication name
+       @param correspondent data type - rdf, json or xml
+    '''
+    def resource(self, author=None, correspondent=None):
+         if author is None:
+             abort(404)
+         else:
+             title = str(urllib.unquote(author))
+             if correspondent == "rdf":
+                 response.headers['Content-Type'] = 'text/xml; charset=utf-8'
+                 #response.headers['Content-Type'] = 'application/rdf+xml; charset=utf-8'
+                 rdf = rdf_transform()
+                 return rdf.create_publication(title, "magazine")
+            

File openletters/controllers/place.py

+import logging, urllib
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+from openletters.transform.transform_json import json_transform
+from openletters.transform.transform_xml import xml_transform
+from openletters.transform.transform_rdf import rdf_transform
+
+from openletters.transform.sparql_funcs import sparql_funcs
+
+log = logging.getLogger(__name__)
+
+'''
+   Controller to show the geographical place. 
+   Need a callback to get a lat/long
+   Shows html in view
+   Show rdf, json and xml in resource
+'''
+
+class PlaceController(BaseController):
+
+    def index(self):
+        # Return a rendered template
+        #return render('/place.mako')
+        # or, return a response
+        sparql = sparql_funcs()
+        locations = []
+        locations = list(sparql.find_places())
+        c.places = sorted(locations)
+
+        return render('letters/magazineindex.html')
+    
+    def view (self,author=None):
+        if author is None:
+            abort(404)
+        else:
+            place = urllib.unquote(author)
+            if place == "Gads Hill":
+                c.start = '51.2440'
+                c.end = '0.2728'
+                c.coordinates = '51.2440N 0.2728E'
+                c.author = place
+                c.abstract = "Gads Hill Place in Higham, Kent, sometimes spelt Gadshill Place and Gad's Hill Place, was the country home of Charles Dickens, the most successful British author of the Victorian era."
+                c.mag_url = "http://en.wikipedia.org/wiki/Gads_Hill_Place"
+            elif place == 'Tavistock House':
+                c.start = '51.5255'
+                c.end = '0.1286'
+                c.coordinates = '51.5255N  0.1286W'
+                c.author = place
+                c.abstract = "Tavistock House was the London home of the noted British author Charles Dickens and his family from 1851 to 1860. At Tavistock House Dickens wrote Bleak House, Hard Times, Little Dorrit and A Tale of Two Cities. He also put on amateur theatricals there which are described in John Forster's Life of Charles Dickens. Later, it was the home of William and Georgina Weldon, whose lodger was the French composer Charles Gounod, who composed part of his opera Polyeucte at the house."
+                c.mag_url = "http://en.wikipedia.org/wiki/Tavistock_House"
+            return render('letters/magazine.html')
+    
+    def resource (self, author=None, correspondent=None):
+        if author is None:
+            abort(404)
+        else:
+            place =  str(urllib.unquote(author))
+        if correspondent == "rdf":
+            response.headers['Content-Type'] = 'text/xml; charset=utf-8'
+            rdf = rdf_transform()
+            return rdf.create_place(place)
+        
+    def map (self, author=None):
+        response.headers['Content-Type'] = 'text/javascript'
+        lat = ''
+        long = ''
+        place = urllib.unquote(author)
+        if place == "Gad's Hill":
+            lat = '51.2440'
+            long = '0.2728'
+        elif place == 'Tavistock House':
+            lat = '51.5255N'
+            long = '0.1286W'
+            
+        print """/
+            var markers = new OpenLayers.Layer.Markers( "Markers" );
+            map.addLayer(markers);
+            
+            var size = new OpenLayers.Size(21,25);
+            var offset = new OpenLayers.Pixel(-(size.w/2), -size.h);
+            var icon = new OpenLayers.Icon('http://www.openlayers.org/dev/img/marker.png', size, offset);
+            markers.addMarker(new OpenLayers.Marker(new OpenLayers.LonLat(0,0),icon));
+        """

File openletters/controllers/publisher.py

+import logging, urllib
+
+from pylons import request, response, session, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+
+from openletters.lib.base import BaseController, render
+
+log = logging.getLogger(__name__)
+
+class PublisherController(BaseController):
+
+    def index (self):
+        return render('letters/publisherindex.html')
+    '''
+       Method to return the publisher details in HTML
+    '''
+    
+    def view (self, author=None):
+        
+        if author is None:
+            return render('letters/publisherindex.html')
+        else:
+            mag = urllib.unquote(author)
+            if mag =="Chapman and Hall":
+                c.abstract = u"""
+                Chapman & Hall was a British publishing house in London, founded in the first half of the 19th century by Edward Chapman and William Hall. Upon Hall's death in 1847, Chapman's cousin Frederic Chapman became partner in the company, of which he became sole manager upon the retirement of Edward Chapman in 1864. In 1868 author Anthony Trollope bought a third of the company for his son, Henry Merivale Trollope. From 1902 to 1930 the company's managing director was Arthur Waugh. In the 1930s the company merged with Methuen, a merger which, in 1955 participated in forming the Associated Book Publishers. The latter was acquired by The Thomson Corporation in 1987.
+                Chapman & Hall was sold again in 1998 as part of Thomson Scientific and Professional to Wolters Kluwer, who sold on its well-regarded mathematics and statistics list to CRC Press. Today the name of Chapman & Hall/CRC is used as an imprint for science and technology books by Taylor and Francis, part of the Informa group since 2004.
+                Most notably, the company were publishers for Charles Dickens (from 1840 until 1844 and again from 1858 until 1870), and William Thackeray. They continued to publish hitherto unpublished Dickens material well into the 20th century.
+                """
+                c.mag_url = u"http://en.wikipedia.org/wiki/Chapman_and_Hall"
+            else:
+                redirect_to(controller='publisher', action='view')
+                
+            return render('letters/publisher.html') 

File openletters/docs/dickens_texts.xml

+<opencorrespondence>
+<book>
+        <id>1</id>
+        <title>The Pickwick Papers</title>
+        <mag_start>1836-04-01</mag_start>
+        <mag_end>1837-11-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>580</source>
+        <url>The_Pickwick_Papers</url>
+        <description></description>
+    </book>
+    <book>
+        <id>2</id>
+        <title>The Adventures of Oliver Twist </title>
+        <mag_start>1837-02-01</mag_start>
+        <mag_end>1839-04-01</mag_end>
+        <aka>Oliver Twist</aka>
+        <aka2></aka2>
+        <source>730</source>
+        <url>Oliver_Twist</url>
+        <description></description>
+    </book>
+    <book>
+        <id>3</id>
+        <title>The Life and Adventures of Nicholas Nickleby</title>
+        <mag_start>1838-04-01</mag_start>
+        <mag_end>1839-10-01</mag_end>
+        <aka>Nicholas Nickleby</aka>
+        <aka2></aka2>
+        <source>967</source>
+        <url>Nicholas_Nickleby</url>
+        <description></description>
+    </book>
+    <book>
+        <id>4</id>
+        <title>The Old Curiosity Shop</title>
+        <mag_start>1840-04-25</mag_start>
+        <mag_end>1841-02-06</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source></source>
+        <url>The_Old_Curiosity_Shop</url>
+        <description></description>
+    </book>
+    <book>
+        <id>5</id>
+        <title>Barnaby Rudge: A Tale of the Riots of 'Eighty </title>
+        <mag_start>1841-02-13</mag_start>
+        <mag_end>1841-11-27</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>917</source>
+        <url>Barnaby_Rudge</url>
+        <description></description>
+    </book>
+    <book>
+        <id>6</id>
+        <title>A Christmas Carol </title>
+        <mag_start>1843</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>46</source>
+        <url>A_Christmas_Carol</url>
+        <description></description>
+    </book>
+    <book>
+        <id>7</id>
+        <title>The Chimes </title>
+        <mag_start>1844</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>653</source>
+        <url>The _Chimes</url>
+        <description></description>
+    </book>
+    <book>
+        <id>8</id>
+        <title>The Cricket on the Hearth </title>
+        <mag_start>1845</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>678</source>
+        <url>The _Cricket_on_the_Hearth</url>
+        <description></description>
+    </book>
+    <book>
+        <id>9</id>
+        <title>The Battle of Life</title>
+        <mag_start>1846</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>676</source>
+        <url>The_Battle_of_Life</url>
+        <description></description>
+    </book>
+    <book>
+        <id>10</id>
+        <title>The Haunted Man and the Ghost's Bargain </title>
+        <mag_start>1848</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>644</source>
+        <url>The_Haunted_Man_and_the_Ghost's_Bargain</url>
+        <description></description>
+    </book>
+    <book>
+        <id>11</id>
+        <title>The Life and Adventures of Martin Chuzzlewit </title>
+        <mag_start>1843-01-01</mag_start>
+        <mag_end>1844-07-01</mag_end>
+        <aka>Martin Chuzzlewit</aka>
+        <aka2></aka2>
+        <source>968</source>
+        <url>Martin_Chuzzlewit</url>
+        <description></description>
+    </book>
+    <book>
+        <id>12</id>
+        <title>Dombey and Son</title>
+        <mag_start>1846-10-01</mag_start>
+        <mag_end>1848-04-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>821</source>
+        <url>Dombey_and_Son</url>
+        <description></description>
+    </book>
+    <book>
+        <id>13</id>
+        <title>David Copperfield </title>
+        <mag_start>1849-05-01</mag_start>
+        <mag_end>1850-11-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>766</source>
+        <url>David_Copperfield</url>
+        <description></description>
+    </book>
+    <book>
+        <id>14</id>
+        <title>Bleak House </title>
+        <mag_start>1852-03-01</mag_start>
+        <mag_end>1853-09-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>1023</source>
+        <url>Bleak_House</url>
+        <description></description>
+    </book>
+    <book>
+        <id>15</id>
+        <title>Hard Times: For These Times </title>
+        <mag_start>1854-04-01</mag_start>
+        <mag_end>1845-08-12</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>786</source>
+        <url>Hard_Times</url>
+        <description></description>
+    </book>
+    <book>
+        <id>16</id>
+        <title>Little Dorrit</title>
+        <mag_start>1855-12-01</mag_start>
+        <mag_end>1857-06-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>963</source>
+        <url>Little_Dorrit</url>
+        <description></description>
+    </book>
+    <book>
+        <id>17</id>
+        <title>A Tale of Two Cities</title>
+        <mag_start>1859-04-30</mag_start>
+        <mag_end>1859-11-26</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>98</source>
+        <url>A_Tale_of_Two_Cities</url>
+        <description></description>
+    </book>
+    <book>
+        <id>18</id>
+        <title>Great Expectations</title>
+        <mag_start>1860-12-01</mag_start>
+        <mag_end>1861-08-03</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>1400</source>
+        <url>Great_Expectations</url>
+        <description></description>
+    </book>
+    <book>
+        <id>19</id>
+        <title>Our Mutual Friend </title>
+        <mag_start>1864-05-01</mag_start>
+        <mag_end>1865-11-01</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>883</source>
+        <url>Our_Mutual_Friend</url>
+        <description></description>
+    </book>
+    <book>
+        <id>20</id>
+        <title>The Mystery of Edwin Drood </title>
+        <mag_start>1870-04-01</mag_start>
+        <mag_end>1870-09-01</mag_end>
+        <aka>Edwin Drood</aka>
+        <aka2></aka2>
+        <source>564</source>
+        <url>The_Mystery_of_Edwin_Drood</url>
+        <description></description>
+    </book>
+    <book>
+        <id>21</id>
+        <title>Sketches by Boz </title>
+        <mag_start>1836</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>882</source>
+        <url>Sketches_by_Boz</url>
+        <description></description>
+    </book>
+    <book>
+        <id>22</id>
+        <title>Sketches by Boz </title>
+        <mag_start>1837</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source></source>
+        <url></url>
+        <description></description>
+    </book>
+    <book>
+        <id>23</id>
+        <title>Reprinted Pieces </title>
+        <mag_start>1861</mag_start>
+        <mag_end></mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source></source>
+        <url></url>
+        <description></description>
+    </book>
+    <book>
+        <id>24</id>
+        <title>The Uncommercial Traveller</title>
+        <mag_start>1869</mag_start>
+        <mag_end>1869</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>914</source>
+        <url>The_Uncommercial_Traveller</url>
+        <description></description>
+    </book>
+    <book>
+        <id>25</id>
+        <title>Master Humphrey's Clock</title>
+        <mag_start>1840-04-04</mag_start>
+        <mag_end>1841-12-04</mag_end>
+        <aka></aka>
+        <aka2></aka2>
+        <source>588</source>
+        <url>Master_Humphrey's_Clock</url>
+        <description></description>
+    </book>
+</opencorrespondence>

File openletters/main.py

-# -*- coding: latin-1 -*-
-'''
-Class to parse the Dickens letters and enter into a store
-'''
-import unicodedata
-from xml.dom import minidom
-import urllib, os
-
-from openletters.parse import parse_text, parse_date
-from openletters import model
-
-def getText(nodelist):
-    rc = []
-    for node in nodelist:
-        if node.nodeType == node.TEXT_NODE:
-            rc.append(unicodedata.normalize('NFKC', node.data))
-    return ''.join(rc)
-
-def handle_elements (elementname, element):
-    e = element.getElementsByTagName(elementname)
-    
-    for name in e:
-        return handle_parts(elementname, name)
-
-    
-def handle_parts (nodename, node):
-    return getText(node.childNodes)
-    
-
-def load_dickens_letters(fileobj, verbose=True):
-    #read = fileobj.read()
-    text = minidom.parse(fileobj)
-
-    #split the body into individual letters
-    letters  = text.getElementsByTagName('div')
- 
-    vol = 1
-    count = 1
-    for letter in letters:
-        modelletter = model.Letter(
-                    volume=handle_elements("volume", letter), 
-                    type=u'dickens',
-                    correspondent = handle_elements("correspondent", letter), 
-                    salutation=unicode(handle_elements("salutation", letter)),
-                    letter_text=unicode(handle_elements("letter", letter)),
-                    letter_date=unicode(handle_elements("date", letter))
-                    )
-        print "date", unicode(handle_elements("date", letter))
-        model.Session.add(modelletter)
-        model.Session.commit()
-    
-        if verbose:
-            print('Letter %s: \n\t ...' % (count))
-            model.Session.remove()
-        else:
-            print('Letter %s: SKIPPING' % (count))
-
-def load_source (fileobj, verbose=True):
-    
-    source_text = minidom.parse(fileobj)
-    
-    letters  = source_text.getElementsByTagName('source')
-    title = ''
-    for letter in letters:
-        modelsource = model.Source (
-               source_id=unicode(handle_elements("id", letter)),   
-               title=unicode(handle_elements("title", letter)), 
-               author=unicode(handle_elements("author", letter)),   
-               publn_data=unicode(handle_elements("publication", letter)),
-               publn_date=unicode(handle_elements("date", letter)), 
-               s_url=unicode(handle_elements("url", letter)),                 
-            )
-        
-        model.Session.add(modelsource)
-        model.Session.commit()
-    
-        if verbose:
-            print('Source %s: \n\t ...' % (title))
-            model.Session.remove()
-        else:
-            print('Source : SKIPPING')
-
-
-def index_letters(self, type, fileobj):
-    import xapian
-
-    db_path = 'db'
-    
-    #database = xapian.WritableDatabase(db_path, xapian.DB_CREATE_OR_OPEN)
-    #open a writable database on the xapian-tcpsrvr
-    database = xapian.remote_open_writable("localhost/correspondence", 33333)
-    indexer = xapian.TermGenerator()
-    indexer.set_stemmer(xapian.Stem('english'))
-    
-    xapian_file_name = 0
-    count = 0
-    text = minidom.parse(fileobj)
-    #split the body into individual letters
-    letters  = text.getElementsByTagName('div')
-    #open the XML, parse the letter id
-    for letter in letters:
-        count +=1
-        text=unicode(handle_elements("letter", letter))
-        corr=unicode(handle_elements("correspondent", letter))
-            
-        document = xapian.Document()
-        document.set_data(text)
-        #not sure this is going to work - rather than using the filename, use letter ids
-        letter_index = type + "/" + urllib.quote(corr) + "/" + str(count)
-
-        print "indexing %s" ,letter_index
-        document.add_value(xapian_file_name, letter_index)
-        
-        indexer.set_document(document)
-        indexer.index_text(text)
-        database.add_document(document)
-        
-    database.flush()

File openletters/model/__init__.py

 import meta
 from meta import Session
 from letter import letter_table, Letter, source_table, Source
+from books import books_table, Book
 
 def init_model(engine):
     """Call me before using any of the tables or classes in the model"""

File openletters/model/books.py

+from sqlalchemy import create_engine, Table, Column, Integer, UnicodeText, MetaData, ForeignKey
+from sqlalchemy import orm
+
+from meta import engine, metadata
+
+'''
+  Function to set up the books table
+  Contains title data 
+  two publication dates (the second one is for ending if the book was serialised)
+  two aka fields if the book is known by another name
+  description is a brief description of the book
+  url is a wikipedia / dbpedia url - might change in future
+'''
+
+books_table = Table('books', metadata,
+                     Column('book_id', Integer, primary_key=True),
+                     Column('book_title', UnicodeText), 
+                     Column('book_pub', UnicodeText),
+                     Column('book_end_pub', UnicodeText),
+                     Column('aka', UnicodeText),
+                     Column('aka2', UnicodeText),
+                     Column('description', UnicodeText),
+                     Column('url', UnicodeText),
+                     Column('source', UnicodeText),
+                     )
+
+class Book(object):
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+orm.mapper(Book, books_table)

File openletters/model/dbase.py

 
 #define the table mappings here
 users = Table('letters', metadata, autoload=True)
-#notes = Table('annotation', metadata, autoload=True)
+books = Table('books', metadata, autoload=True)
 
 session = Session()
 
         ret_arr[row[4]] = [row[5]]
     
     return ret_arr
+
+def get_books():
+    titles_set = set()
+    titles = books.select()
+    rs = titles.execute()
+    for row in rs:
+        titles_set.add(str(row[1]).strip())
+        if str(row[1]).startswith("A "):
+            titles_set.add(str(row[1])[1:].strip())
+        if str(row[1]).startswith("The "):
+            titles_set.add(str(row[1])[3:].strip())
+        if ":" in str(row[1]):
+            for n in str(row[1]).split(":"):
+                titles_set.add(n[0])
+        if row[4] is not None:
+            titles_set.add(str(row[4]).strip())
+    
+    return titles_set
+#gets the book details
+def get_book_rdf (title):
+    book_arr = {}
+    book = books.select(books.c.url == title)
+    rs = book.execute()
+    for row in rs:
+        book_arr[row[1]] = [row[2], row[3], row[4], row[7], row[8]]
+        #book_arr[row[1]] = [row[2]]
+        
+    return book_arr
 #gets any annotations for a letter - this will come later
 def get_annotation (url):
     annotation = notes.select(notes.c.url == url)

File openletters/parse/ReadMe.txt

-This folder will contain tools to parse the text file into the Redis store.
-
-17 April 2010 - IE to convert current PHP scripts into Python

File openletters/parse/parse_text.py

     ret_url = ret_url.replace(".", "")
     ret_url = ret_url.replace(": ", "")
     ret_url = ret_url.replace('"', "")
+    ret_url = ret_url.replace(',', "")
+    ret_url = ret_url.replace('\n', "")
     if type == "url":
         ret_url = ret_url.replace(" ", "")
         ret_url = ret_url.strip().lower()
                 
             if str(a[:1]).isupper():
                 if "!" not in a and len(str(a)) < 40:
-                    ret_quotes.append(a)  
+                    ret_quotes.append(camel_case(a))  
             else:
                 pass     
     else:
         
         if str(bq[:1]).isupper():
             if  "!" not in bq and len(str(bq)) < 40:
-                ret_quotes.append(bq)         
+                ret_quotes.append(camel_case(bq))         
         else:
             pass
         
     return ret_name 
 
 ''' 
-Method to return the full author name from db representation 
+   Method to return the full author name from db representation 
 '''
 def author_full (self, author):
         
     if "dickens" in author:
         full_author = "Charles Dickens" 
         
-    return full_author
+    return full_author
+
+'''
+   Method to return a geographical place from the header
+'''
+def find_geographical (text):
+    
+    place = re.findall(".*\._\s+", text)
+    match_place = place
+    
+    if match_place:
+        for m in match_place[0].split("_"):
+            place_str = m.strip()
+            if place_str[:2].isupper():
+                return unicode(camel_case(place_str[0: -1]), 'utf-8')
+            else:
+                return "No Place"
+    else:
+        return "No Place"
+'''
+   Capitalise the first letters and turn the string into camel case to normalise for URIs
+'''
+def camel_case (text_string):
+    return " ".join(t_str.capitalize() for t_str in text_string.split())
+    

File openletters/templates/layout.html

   <link rel="stylesheet" href="http://m.okfn.org/kforge/css/master.css" type="text/css" media="screen, print" title="Master stylesheet" charset="utf-8" />
   <link rel="stylesheet" href="/css/extra.css" type="text/css" media="screen, print" title="Master stylesheet" charset="utf-8" />
 
-  <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4/jquery.min.js"></script>
+  <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4/jquery.min.js"></script>
   <script type="text/javascript" src="http://m.okfn.org/kforge/scripts/kforge.js"></script>
 
+
   <py:if test="defined('optional_head')">
     ${optional_head()}
   </py:if>
 </head>
-<!-- remove if timeline doesn't work -->
-<body onload="onLoad();" onresize="onResize();" >
+<!-- remove if timeline doesn't work 
+<body onload="onLoad();" onresize="onResize();"  >
+-->
+<body onload="init()" onresize="onResize();"  >
 <div id="airlock">
   <!--[if IE]>
     <hr class="holder" />

File openletters/templates/letters/author.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+ 
+ <h3>Charles Dickens</h3>
+
+ <p>Born: ${c.born}</p>
+ <p>Died: ${c.died}</p>
+ <p>Life: ${c.abstract}</p>
+ <a href="${c.author_url}">Charles Dickens page</a>
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/authorindex.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+ <p>Current authors in the database organized by their associated author.</p>
+ 
+ <h3>Author index </h3>
+ <ul>
+   <li >
+     <a href="${url(controller='author', action='view', author='Charles%20Dickens')}">Charles Dickens</a>
+  </li>
+ </ul>
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/book.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+<div py:for="book in c.books">
+ <h3>${book.book_title}</h3>
+
+ <p>Published: ${book.book_pub}</p>
+ <p>Published: ${book.book_end_pub}</p>
+ <p>Abstract: ${book.description}</p>
+ <p><a href="http://en.wikipedia.org/wiki/${book.url}">${book.url}</a></p>
+ <p><a href="http://gutenberg.org/ebooks/${book.source}">Gutenberg source</a></p>
+ </div>
+</div>
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/magazine.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Magazines</py:def>
+  <py:if test="c.coordinates">
+ <py:def function="optional_head">
+
+   <script type="text/javascript" src="http://www.openlayers.org/api/OpenLayers.js"></script>
+   <script type="text/javascript">
+   var map, layer;
+
+   function init(){
+       //OpenLayers.ProxyHost="/proxy/?url=";
+       map = new OpenLayers.Map('map');
+       layer = new OpenLayers.Layer.WMS( "OpenLayers WMS", 
+           "http://vmap0.tiles.osgeo.org/wms/vmap0", {layers: 'basic'} );
+       //need to get an API key for open correspondence
+       //layer = new OpenLayers.Layer.Google("Google Streets", // the default
+       //        {numZoomLevels: 20}
+        //   );
+          
+           
+       map.addLayer(layer);
+       //now to overlay with street data
+       map.setCenter(new OpenLayers.LonLat(${c.end}, ${c.start}), 6);
+
+       var markers = new OpenLayers.Layer.Markers( "Markers" );
+       map.addLayer(markers);
+
+       var size = new OpenLayers.Size(21,25);
+       var offset = new OpenLayers.Pixel(-(size.w/2), -size.h);
+       var icon = new OpenLayers.Icon('http://www.openlayers.org/dev/img/marker.png',size,offset);
+       markers.addMarker(new OpenLayers.Marker(new OpenLayers.LonLat(${c.end}, ${c.start}),icon));
+
+       //map.addControl(new OpenLayers.Control.LayerSwitcher());
+       //map.zoomToMaxExtent();
+
+   }
+
+   </script>
+ </py:def>
+  </py:if>
+ 
+<div py:def="content">
+ 
+ <h3>${c.author}</h3>
+
+ <p>Born: ${c.start}</p>
+ <p>Died: ${c.end}</p>
+ <p><py:if test="c.coordinates">${c.coordinates}</py:if></p>
+ <p>Life: ${c.abstract}</p>
+ 
+<py:if test="c.coordinates">
+    <div id="map" style="
+    width: 512px;
+    height: 256px;
+    border: 1px solid #ccc;
+    "></div>
+</py:if>
+
+ <a href="${c.mag_url}">${c.author}</a>
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/magazineindex.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+
+ <py:if test="c.magazines">
+ <p>Current places in the database.</p>
+ 
+ <h3>Index of Periodicals</h3>
+ <ul>
+  <div py:for="mags in c.magazines">
+   <li >
+     <a href="${url(controller='magazine', action='view', author=mags)}">${mags}</a>
+  </li>
+   </div>
+ </ul>
+</py:if>
+
+<py:if test="c.places">
+    <h3>Index of Locations</h3>
+ <ul>
+  <div py:for="places in c.places">
+   <li >
+     <a href="${url(controller='place', action='view', author=places)}">${places}</a>
+  </li>
+   </div>
+ </ul>
+</py:if>
+
+<py:if test="c.titles">
+    <h3>Title index </h3>
+ <ul>
+  <div py:for="title in c.titles">
+   <li >
+     <a href="${url(controller='book', action='view', author=title.url)}">${title.book_title}</a>
+  </li>
+   </div>
+ </ul>
+</py:if>
+
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/publisher.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+ 
+ <h3>Chapman and Hall</h3>
+
+ <p>Abstract: ${c.abstract}</p>
+ <a href="${c.mag_url}">Chapman and Hall</a>
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/letters/publisherindex.html

+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:xi="http://www.w3.org/2001/XInclude" 
+ py:strip="True">
+
+ <py:def function="page_title">Letters - Home</py:def>
+
+<div py:def="content">
+ <p>Current publishers held in the collection</p>
+ 
+ <h3>Publisher index </h3>
+ <ul>
+   <li >
+     <a href="${url(controller='publisher', action='view', author='Chapman and Hall')}">Chapman and Hall</a>
+  </li>
+ </ul>
+ </div>
+
+ <xi:include href="../layout.html" />
+</html> 

File openletters/templates/timeline/index.html

     <script src="http://static.simile.mit.edu/timeline/api-2.3.0/timeline-api.js" type="text/javascript"></script>
   <script type="text/javascript">
       var tl;
-      function onLoad() {
+      function init() {
   
       var eventSource = new Timeline.DefaultEventSource();
 

File openletters/tests/functional/test_author.py

+from openletters.tests import *
+
+class TestAuthorController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='author', action='index'))
+        # Test response...
+        pass

File openletters/tests/functional/test_book.py

+from openletters.tests import *
+
+class TestBookController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='book', action='index'))
+        # Test response...
+        pass

File openletters/tests/functional/test_correspondent.py

+from openletters.tests import *
+
+class TestCorrespondentController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='correspondent', action='index'))
+        # Test response...
+        pass

File openletters/tests/functional/test_magazine.py

+from openletters.tests import *
+
+class TestMagazineController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='magazine', action='index'))
+        # Test response...
+        pass

File openletters/tests/functional/test_place.py

+from openletters.tests import *
+
+class TestPlaceController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='place', action='index'))
+        # Test response...
+        pass

File openletters/tests/functional/test_publisher.py

+from openletters.tests import *
+
+class TestPublisherController(TestController):
+
+    def test_index(self):
+        #response = self.app.get(url(controller='publisher', action='index'))
+        # Test response...
+        pass

File openletters/transform/sparql_funcs.py

+import rdflib,urllib
+
+try:
+    from sets import Set
+except ImportError:
+    from set import Set
+
+try:
+    from rdflib.Graph import ConjunctiveGraph as Graph
+except ImportError:
+    from rdflib.graph import ConjunctiveGraph as Graph
+
+        
+from rdflib.store import Store, NO_STORE, VALID_STORE
+from rdflib import Namespace, Literal, URIRef, RDF, RDFS, plugin
+
+
+''' 
+    Functions to use parse the RDF endpoint to build indexes from the RDF data
+'''
+
+geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
+
+class sparql_funcs():
+    
+    def __init__(self):
+        self.g = Graph('IOMemory')
+        self.endpoint = "http://localhost:5000/data/endpoint/rdf"
+        #self.g.bind('geo', geo)
+
+    def find_places(self):
+        '''
+            Function to get the distinct locations mentioned in the headers of the letters. 
+            These are the locations from which Dickens wrote. 
+            TODO: Parsing the letters to get the places mentioned in them
+        '''
+        row = set()
+        self.g.parse(self.endpoint)
+        print "places", len(self.g)
+        for s,_,n in self.g.triples((None, geo['name'], None)):
+            loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/place/resource/", ""))
+            row.add(self.__tidy_location(loc_key))
+       
+        return row
+    
+    def __tidy_location (self, location):
+        '''
+           Function to tidy up some of the places where they refer to the same place
+           TODO: prob need some language processing to make this scalable
+        '''
+        ret_location = '';
+        if location == 'Office Of "household Words,':
+            ret_location = "Household Words"
+        elif location== '"household Words" Office':
+            ret_location = "Household Words"
+        elif location== '"household Words"':
+            ret_location = "Household Words"
+        elif location== 'H. W. Office':
+            ret_location = "Household Words"
+        elif location == '"household Words,':
+            ret_location = "Household Words"
+        elif location == '"all The Year Round" Office':
+            ret_location = "All The Year Round"
+        elif location == 'Office Of "all The Year Round,':
+            ret_location = "All The Year Round"
+        elif location == "Gad's Hill Place":
+            ret_location = "Gads Hill"
+        elif location == "Gad's Hill":
+            ret_location = "Gads Hill"
+        elif location == "Gad's Hill Place, Higham":
+            ret_location = "Gads Hill"
+        elif location == "Tavistock House, Tavistock Square":
+            ret_location = "Tavistock House"
+        elif location == "London, Tavistock House":
+            ret_location = "Tavistock House"
+        elif location == "Tavistock House, London":
+            ret_location = "Tavistock House"
+        else:
+            if "U.s." in location:
+                location = str(location).replace("U.s", "United States")
+            ret_location = str(location).replace(".", "")    
+            
+        return ret_location

File openletters/transform/transform_rdf.py

-from openletters.model import dbase
-from openletters.parse import parse_text
 import urllib, rdflib
 
 try:
 from rdflib.store import Store, NO_STORE, VALID_STORE
 from rdflib import Namespace, Literal, URIRef, RDF, RDFS, plugin
 
+from openletters.model import dbase
+from openletters.parse import parse_text
 
-
-letter_ns = Namespace('http://purl.org/letter/')
+letter_ns = Namespace('http://www.opencorrespondence.org/schema#')
+skos = Namespace('http://www.w3.org/2008/05/skos#')
 FOAF = Namespace('http://xmlns.com/foaf/0.1/')
 XSD_NS = Namespace(u'http://www.w3.org/2001/XMLSchema#')
 owl_time = Namespace('http://www.isi.edu/~pan/damltime/time-entry.owl#')
 dublin_core = Namespace('http://purl.org/dc/elements/1.1/')
+owl = Namespace('http://www.w3.org/2002/07/owl#')
+exam = Namespace('http://example.org/')
+geo = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
+
+
 base_uri = "http://www.opencorrespondence.org/"
 
 class rdf_transform:
         self.g.bind('foaf', FOAF)
         self.g.bind('time-entry', owl_time)
         self.g.bind('letter', letter_ns)
+        self.g.bind('owl', owl)
+        self.g.bind('ex', exam)
+        self.g.bind('geo', geo)
         self.g.bind('base', base_uri)
 
-    '''
-      creates an rdf representation of letter used to load into the triple store
-      '''
+
     def create_rdf_letter (self, letters):
- 
+        '''
+          creates an rdf representation of letter used to load into the triple store
+        '''
         for l in letters:
-            correspondence = base_uri + "letters/view/" + l.type + '/' + urllib.quote(l.correspondent) + '/' + str(l.id)
+            correspondence = base_uri + "letters/resource/" + l.type + '/' + urllib.quote(l.correspondent) + '/' + str(l.id)
             self.add_author(correspondence, "Charles Dickens")
-            
+            self.add_subject(correspondence, "letter")
             self.add_time(correspondence, str(l.letter_date)+'T00:00:00')
             self.add_correspondent(correspondence, l.correspondent)
-
+            #self.add_place(correspondence, parse_text.find_geographical(l.letter_text))
+            try:
+                place = parse_text.find_geographical(str(l.letter_text))
+            #unicode errors are text related
+            except UnicodeError:
+                pass
+            if place is not None:
+                self.add_place(correspondence, place)
+                
+            self.add_letter_text(correspondence, l.letter_text)
             self.add_salutation(correspondence, l.correspondent, l.salutation)
                 #this section will parse for proper names in due course
                 #commented out whilst code is being ported
             letter_quotes = parse_text.parse_balanced_quotes(l.letter_text)
             for quote in letter_quotes:
                  if str(quote[0:1]).isupper and "!" not in quote:
-                     self.add_text(correspondence, parse_text.stripPunc(quote))
+                     if quote == "ALL THE YEAR ROUND" or quote=="HOUSEHOLD WORDS" or quote== "Household Words":
+                         self.add_magazine(correspondence, parse_text.stripPunc(quote))
+                     else:
+                         self.add_text(correspondence, parse_text.stripPunc(quote))
                 
         letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3)
         return letter_rdf
     
-    ''' function to create an endpoint in rdf/xml '''
+    
     def create_rdf_end (self):
-
+        ''' function to create an endpoint in rdf/xml '''
         correspondence = base_uri 
         
         letter = {}  
     
         letter_items = letter.items()
         letter_items.sort()
-          
+    
+        works = set()
+        works = dbase.get_books()
+        
         for url, text in letter_items:
-            correspondence = base_uri + "letters/view/dickens/" + urllib.quote(text[1]) + '/' + str(url)
-            self.add_author(correspondence, "Charles Dickens")
+            try:
+                correspondence = base_uri + "letters/resource/dickens/" + urllib.quote(text[1]) + '/' + str(url)
+                self.add_author(correspondence, "Charles Dickens")
+                self.add_subject(correspondence, "letter")
+                self.add_subject(correspondence, "Charles Dickens")
+                self.add_subject(correspondence, parse_text.camel_case(str(text[1])))
+                self.add_time(correspondence, str(text[3])+'T00:00:00')
+                self.add_correspondent(correspondence, urllib.quote(parse_text.camel_case(str(text[1]))))
+                self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4]))
+                place = parse_text.find_geographical(str(text[2]))
+                letter = str(text[2])
+            #unicode errors are text related
+            except UnicodeError:
+                pass
+            if place is not None:
+                self.add_place(correspondence, place)
             
-            self.add_time(correspondence, str(text[3])+'T00:00:00')
-            self.add_correspondent(correspondence, urllib.quote(str(text[1])))
-            self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4]))
+            self.add_letter_text(correspondence, letter)
+            
             #this section will parse for proper names in due course
             #commented out whilst code is being ported
             #letter_name = parse_text.parseProperNames(text)
-           # print"names, ", letter_name
-            
-            #for name in letter_name:
-            #    letter_rdf += "<letter:personReferred>%s</letter:personReferred>" %(name)
-           # works = Set(["Copperfield", "David Copperfield"])                
+           # print"names, ", letter_name 
+                           
             letter_quotes = parse_text.parse_balanced_quotes(text[2])
             for quote in letter_quotes:
-                #the length is to remove anything really long
-                #if str(quote[0:1]).isupper and "!" not in quote and len(str(quote)) < 40:
-                self.add_text(correspondence, parse_text.stripPunc(quote))
-                #if quote in works:
-                #    self.add_author_text(correspondence, parse_text.stripPunc(quote))
-                #else:
-                #    self.add_text(correspondence, parse_text.stripPunc(quote))
+                work = parse_text.stripPunc(quote)
+
+                #TODO: Normalise the text to reduce code repetition
+                periodicals = set(['All The Year Round', 'Household Words', 'The Daily News'])
+                #print "quote", parse_text.stripPunc(quote)
+                if quote in periodicals:
+                    self.add_magazine(correspondence, quote)
+                
+                if work in works:
+                    if work == "Copperfield":
+                        work = "David Copperfield"
+                    elif work == "Nickleby":
+                        work = "Nicholas Nickleby"
+                    elif work == "Edwin Drood":
+                        work = "The Mystery of Edwin Drood" 
+                    elif work == "Dombey":
+                        work = "Dombey and Son" 
+                    elif work == "Tale of Two Cities":
+                        work = "A Tale of Two Cities"
+                    elif work == "Christmas Carol":
+                        work = "A Christmas Carol"
+                        
+                    self.add_text(correspondence, work)
 
         letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3)
         return letter_rdf
         
     def create_correspondent(self, corr, letter_items):
             u_corr = unicode(corr)
-            
-            correspondence = base_uri + "letters/correspondent/" + urllib.quote(corr)
-            
-            self.add_correspondent(correspondence, corr)
+
+            correspondence = base_uri + "correspondent/resource/" + urllib.quote(corr)
+            self.add_subject(correspondence, "correspondent")
+            #self.add_correspondent(correspondence, corr)
     
             for url, text in letter_items:
                 if url is not None or url != '':
                     self.add_salutation(correspondence, corr, str(url))
+            #need rules to define relationships - family, authors
+            if u_corr == "Miss Hogarth":
+                self.add_subject(correspondence, "daughter")
+                self.add_daughter(correspondence, "Charles Dickens")
+                self.add_sameas(correspondence, "http://dbpedia.org/page/Georgina_Hogarth")
+                
+            letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3)
+            
+            return letter_rdf
+        
+    def create_publication(self, title, type):
+            books_set = {}
+            start = '';
+            end = '';
+            abstract = '';
+            uri_str = '';
+
+            if type == "magazine":
+                if title == "Household Words":
+                     start = u"1850-03-01"
+                     end = u"1859-05-01"
+                     abstract = u"Household Words was an English weekly magazine edited by Charles Dickens in the 1850s which took its name from the line from Shakespeare 'Familiar in his mouth as household words' - Henry V"
+                     uri_str = u"Household_Words"
+                elif title =="All the Year Round":
+                     start = u"1859-01-28"
+                     end = u"1895-03-30"
+                     abstract = u"All the Year Round was a Victorian periodical, being a British weekly literary magazine founded and owned by Charles Dickens, published between 1859 and 1895 throughout the United Kingdom. Edited by Dickens, it was the direct successor to his previous publication Household Words, abandoned due to differences with his former publisher. It hosted the serialization of many prominent novels, including Dickens' own A Tale of Two Cities. After Dickens's death in 1870, it was owned and edited by his eldest son Charles Dickens, Jr." 
+                     uri_str = u"All_the_Year_Round"
+            else:
+                books = dbase.get_book_rdf(title)
+                book_items = books.items()
+                book_items.sort()
+                
+                for u, book in book_items:
+
+                    title = u
+                    start = book[0]
+                    end = book[1]
+                    abstract = book[2]
+                    uri_str = book[3]
+                    source = book[4]
+                    #create a books dictionary as a list of records to build a list of uris from
+                    # title => uri string
+                    books_set[u] = uri_str
+                    
+                    if ":" in u:
+                        for bk in u.split(":"):
+                            books_set[bk[0]] = uri_str
+            
+                    if "The " in u or "A " in u:
+                        aka = u.replace("The ", "").replace("A ", "")
+                        books_set[aka] = uri_str
+            
+            correspondence = base_uri + type + "/resource/" + title.strip().replace(" ", "_")
+            self.add_subject(correspondence, type)
+            self.add_subject(correspondence, "Charles Dickens")
+            self.add_author(correspondence, "Charles Dickens")    
+            self.add_time(correspondence, start)
+            self.add_time(correspondence, end)
+            self.add_title(correspondence, title)
+                
+            self.add_abstract(correspondence, abstract)
+            uri = u"http://dbpedia.org/page/" + uri_str
+            self.add_sameas(correspondence, uri)
+            
+            if type == "book":
+               source_uri = "http://gutenberg.org/ebooks/" + source
+               self.add_sameas(correspondence, source_uri)
             
             letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3)
             
             return letter_rdf