Source

ordf / ordf / handler / httpfourstore.py

Full commit
from ordf.handler import HandlerPlugin
from ordf.graph import Graph, _Graph, ConjunctiveGraph
from ordf.utils import get_identifier
from urlparse import urljoin
from urllib import quote, urlencode
from urllib2 import urlopen, Request
from cStringIO import StringIO
from rdflib.util import from_n3
from collections import namedtuple
from rdflib.query import Result
import logging

LOG = logging.getLogger(__name__)

class HTTPFourStore(HandlerPlugin):

    def __init__(self, *args, **kw):
        self.url = kw['url']
        if not self.url.endswith('/'):
            self.url = self.url + '/'
        self.dataurl = urljoin(self.url, 'data/')
        self.sparqlurl = urljoin(self.url, 'sparql/')

    def __getitem__(self, key):
        """
        Implement in concrete subclasses
        """
        #GET: http://host/prefix/data/graphname
        ident = get_identifier(key)
        query = 'query=' + quote("CONSTRUCT { ?s ?p ?o } FROM <%s> WHERE { ?s ?p ?o }"  % ident)
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        headers['accept'] = 'text/plain'
        #headers['accept'] = 'application/x-turtle' .. not supported?
        # also support xml, and json?
        r = Request(self.sparqlurl, query, headers)
        response = urlopen(r)
        # TODO: catch errors here
        x = Graph(identifier=ident)
        x.parse(response, format='turtle')
        LOG.debug("loaded %d triples from %s", len(x), x)
        # TODO: catch errors here
        return x
        
    def __setitem__(self, key, value):
        """
        Implement in concrete subclasses
        """
        assert isinstance(value, _Graph)
        LOG.debug("replace %d triples in %s", len(value), value)
        ident = get_identifier(key)
        params = {'graph': ident}  # apikey?
        data = StringIO()
        value.serialize(data, format='turtle')
        headers = {'content-type': 'application/x-turtle'}
        url = '%s?%s' % (self.dataurl, urlencode(params))
        r = Request(url, data.getvalue(), headers)
        r.get_method = lambda: 'PUT'
        response = urlopen(r)
        # TODO: catch errors here
        
    def __delitem__(self, key):
        """
        Implement in concrete subclasses
        """
        ident = get_identifier(key)
        params = {'graph': ident}  # apikey?
        url = '%s?%s' % (self.dataurl, urlencode(params))
        r = Request(url)
        r.get_method = lambda: 'DELETE'
        response = urlopen(r)
        # TODO: catch errors here

    def append(self, frag):
        assert isinstance(frag, _Graph)
        LOG.debug("append %d triples to %s", len(value), value)
        ident = get_identifier(frag)
        data = StringIO()
        frag.serialize(data, format='turtle')
        params = {'graph': ident,
                  'data': data.getvalue(),
                  'mime-type': 'application/x-turtle',
                  #'apikey': None,
                  }
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        r = Request(self.dataurl, urlencode(params), headers)
        #r.get_method = lambda: 'POST'
        response = urlopen(r)
        # TODO: catch errors here

    def query(self, query):
        params = {'query': query,
                  #'soft-limit': <set to something not overwhelming>,
                  'output': 'text',  # future versions will support this
                  }        
        # sparqlurl /sparql/ form-encodod post
        #    values: query, soft-limit, output(text, sparql, json), deault-graph-uri, apikey, callback
        #    flags: restricted,
        #    unsupported: default-graph-uri ? not passed on?

        #headers {'accept': 'text/tab-separated-values'}
        #   output vs. accept header
        # output: text -> text/turtle, text/tab-separated-values, text/plain ?
        #         csv -> text/csv
        #         json -> application/json, application/sparql-results+json
        #         sparql -> application/rdf+xml, application/sparql-results+xml
        #         text

        # FIXME: hack Accept header together until 4store is upgraded to support output parameter
        #        support update queries here?
        headers = {}
        if 'select' in query.lower():
            headers['accept'] = 'text/tab-separated-values'
        else:
            headers['accept'] = 'text/turtle'
        r = Request(self.sparqlurl, urlencode(params), headers)
        LOG.debug('start query')
        response = urlopen(r)
        LOG.debug('finished query')
        
        # updateurl /update/ form-encoded post
        #    update, apikey

        # TODO: check what response.info is good for?
        if response.headers.gettype() in ('application/sparql-results+xml',):
            #TODO: do some xml parsing here and genaret result list
            #result = response.read()
            result = Result.parse(response, 'xml')
        elif response.headers.gettype() in ('text/tab-separated-values',):
            # read by line and split on first to tabs....
            # generate Nodes with fromN3 function.
            # first line is bindings:
            result = Result.parse(response, 'tsv')
            #bindings = [ x[1:] for x in response.readline().strip().split("\t", 2)  ]
            #Row = namedtuple('Row', bindings)
            # TODO: try to convert this to generator....
            #       however, return below will make troubles
            #result = [ Row(*[from_n3(x) for x in line.strip().split("\t", 2)]) for line in response.readlines()  ]
        else:
            result = Graph()
            result.parse(response, format='turtle')
        LOG.debug('return parsed result of len %d', len(result))
        return result
        
        #construct or describe: return Graph...
        #otherwise list?
        
    def commit(self):
        """
        Impement in concrete subclasses
        """
        pass

    def rollback(self):
        """
        Implement in concrete subclasses
        """
        pass