Anonymous avatar Anonymous committed ba4bd74

edit operations

Comments (0)

Files changed (2)

     For operations that involve writing data to the store, the graph identifier 
     that should be the destination of the data. Default is the filename.
 
+.. cmdoption:: -e sub, --edit sub
+
+    For importing as opposed to copying a graph from a remote store, pass
+    this sed(1) style substitution command. For example for development
+    one might do:
+
+    .. code-block:: sh
+
+        ordf -c development.ini -s --nocs \
+            -e 's@^http://.*bibliographica.org/@http://localhost:5000/@' \
+            http://bnb.bibliographica.org/entry/GB6214539
+
+    It is possible to set these options in the configuration file as well like
+    so::
+
+        ordf.edit = ['s@^http://.*bibliographica.org/@http://localhost:5000/@']
+
+    Unless the `--noprov` flag is given, these edit operations are recorded
+    in the provenance block of the imported graph.
+    
 .. cmdoption:: -u username, --user username
 
     Username for logging changes
 import urllib
 import logging
 import sys
+import re
 
 def config(filename):
     cfgpath = os.path.abspath(filename)
     parser.add_option("--noprov", action="store_true",
                       dest="noprov", default=False,
                       help="Do not store provenance information")
+    parser.add_option("-e", "--edit", action="append", dest="edit",
+                      default=[], help="Transform URIs according to the given sed-like regexp")
     def parse_config(self):
         super(ORDF, self).parse_config()
 
                     print "ERROR: missing log message"
                     exit(1)
 
+        regexps = self.config.get("ordf.edit")
+        if regexps is not None:
+            self.config["ordf.edit"] = eval(regexps)
+        else:
+            self.config["ordf.edit"] = []
+        if self.options.edit != []:
+            self.config["ordf.edit"] = self.options.edit
+            
         if self.options.reindex:
             reader = self.config.get("ordf.readers")
             if reader.find(",") >= 0:
         self.log = logging.getLogger("ordf.command/%s" % os.getpid())
 
     def command(self):
+        self.compile_edit()
+
         if self.options.reload:
             if os.environ.get(self._reloader_environ_key):
                 self.log.debug("using reloading file monitor")
                         ident = URIRef(filename)
                     else:
                         ident = URIRef("file://" + os.path.abspath(filename))
-
+                    ident = self.edit(ident)
+                
                 g = Graph(identifier=ident)
-
+                
                 if self.options.save:
-                    g.parse(filename, format=self.options.format)
-
+                    ingraph = Graph()
+                    ingraph.parse(filename, format=self.options.format)
+                    for s, p, o in ingraph.triples((None, None, None)):
+                        if isinstance(s, URIRef):
+                            s = self.edit(s)
+                        if isinstance(o, URIRef):
+                            o = self.edit(o)
+                        g.add((s, p, o))
+                    
                     if not self.options.noprov:
                         proc = Process()
                         proc.agent(agent)
                         else:
                             src = URIRef("file://" + os.path.abspath(filename))
                         proc.use(src)
+                        if self.config["ordf.edit"] != []:
+                            proc.add((proc.identifier, RDFS["comment"],
+                                      Literal("\n".join(self.config["ordf.edit"]))))
                         proc.result(g)
 
                     self.log.info("add %s" % (g.identifier,))
             _daemon()
         self.handler.close()
 
+    def compile_edit(self):
+        self._edit = []
+        def err(reason):
+                self.log.error("invalid substitution: %s (%s)", subst, reason)
+                self.log.error("must be of the form s@pattern@replacement@")
+                sys.exit(255)
+
+        for subst in self.config["ordf.edit"]:
+            if not subst.startswith("s"): err("does not start with s")
+            if len(subst) < 4: err("too short")
+            sep = subst[1]
+            if sep == '\\': err("escape character is not a valid separator")
+            if subst[-1] != sep: err("wrong end character")
+            pat_start = 2
+            pat_end = -1
+            for i in range(2, len(subst)-1):
+                if subst[i] == sep and subst[i-1] != '\\':
+                    pat_end = i
+                    break
+            if pat_end < 0: err("could not find end of pattern")
+            pattern = subst[pat_start:pat_end]
+            repl_start = pat_end+1
+            replace = subst[pat_end+1:-1]
+            if len(replace) > 0 and replace[-1] == '\\': err("last character is escaped")
+            if len(pattern) == 0: err("no pattern")
+
+            try:
+                compiled = re.compile(pattern)
+                self._edit.append((compiled, replace))
+            except Exception, e:
+                err("invalid regular expression: %s" % e)
+
+    def edit(self, uri):
+        for pattern, replace in self._edit:
+            uri = pattern.sub(replace, uri)
+        return URIRef(uri)
+    
     def restart_with_reloader(self):
         while 1:
             args = [sys.executable] + sys.argv
 from setuptools import setup, find_packages
 import sys, os
 
-version = '0.34'
+version = '0.35'
 
 try:
     from mercurial import ui, hg, error
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.