dcat-tools / dcat / core / normalize.py

from datautil.normalization.table_based import Formats, Licenses
from rdflib.term import URIRef, Literal, BNode, Node

from vocab import * 
from processing import GraphProcessor

class FormatsGraphProcessor(GraphProcessor):

    @property
    def normalizer(self):
        if not hasattr(self, '_normalizer'): 
            assert 'google_user' in self.config
            assert 'google_password' in self.config
            self._normalizer = Formats(self.config.get('google_user'), 
                              self.config.get('google_password'))
        return self._normalizer

    def _find_format_literal(self, graph, o):
        if isinstance(o, Literal): 
            yield unicode(o)
        else:
            for p in [RDF.value, RDFS.label]:
                for (_, _, v) in graph.triples((o, p, None)):
                    if isinstance(v, Literal): 
                        yield unicode(v)


    def process(self, graph): 
        for (s, p, o) in graph.triples((None, DC['format'], None)):
            out = {}
            for text in self._find_format_literal(graph, o): 
                out = self.normalizer.get(unicode(text), 
                                          source_hint=unicode(s))
                if out.get('mimetype') is not None: 
                    if isinstance(o, Literal):
                        graph.remove((s, p, o))
                        o = BNode() 
                        graph.add((s, p, o))
                        graph.add((o, RDF.type, DC.IMT))
                    if (o, RDF.value, None) in graph: 
                        graph.remove((o, RDF.value, None))
                    graph.add((o, RDF.value, Literal(out.get('mimetype'))))
                    if out.get('name') is not None:
                        graph.remove((o, RDFS.label, None))
                        graph.add((o, RDFS.label, Literal(out.get('name'))))
                    if out.get('description') is not None:
                        graph.remove((o, RDFS.comment, None))
                        graph.add((o, RDFS.comment, Literal(out.get('description'))))
                    elif out.get('fullname') is not None:
                        graph.remove((o, RDFS.comment, None))
                        graph.add((o, RDFS.comment, Literal(out.get('fullname'))))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.