Commits

medoc  committed 7f655d9

have the zip filter access the config if possible and use the zipSkippedNames variable

  • Participants
  • Parent commits 8ed11f9

Comments (0)

Files changed (1)

File src/filters/rclzip

 
 # Zip file filter for Recoll
 
+import os
+import fnmatch
 import rclexecm
 from zipfile import ZipFile
 
+try:
+    from recoll import rclconfig
+    hasrclconfig = True
+except:
+    hasrclconfig = False
+# As a temporary measure, we also look for rclconfig as a bare
+# module. This is so that the intermediate releases of the filter can
+# ship and use rclconfig.py with the filter code
+if not hasrclconfig:
+    try:
+        import rclconfig
+        hasrclconfig = True
+    except:
+        pass
+
 # Note about file names (python 2.6. 2.7, don't know about 3.)
 #
 # There is a bit in zip entries to indicate if the filename is encoded
 
     ###### File type handler api, used by rclexecm ---------->
     def openfile(self, params):
+        filename = params["filename:"]
         self.currentindex = -1
+        self.skiplist = []
+
+        if hasrclconfig:
+            config = rclconfig.RclConfig()
+            config.setKeyDir(os.path.dirname(filename))
+            skipped = config.getConfParam("zipSkippedNames")
+            if skipped is not None:
+                self.skiplist = skipped.split(" ")
+
         try:
-            self.zip = ZipFile(params["filename:"])
+            self.zip = ZipFile(filename)
             return True
         except:
             return False
             #self.em.rclog("getnext: EOF hit")
             return (False, "", "", rclexecm.RclExecM.eofnow)
         else:
-            ret= self.extractone(self.zip.namelist()[self.currentindex])
+            entryname = self.zip.namelist()[self.currentindex]
+
+            if hasrclconfig and len(self.skiplist) != 0:
+                while self.currentindex < len(self.zip.namelist()):
+                    entryname = self.zip.namelist()[self.currentindex]
+                    for pat in self.skiplist:
+                        if fnmatch.fnmatch(entryname, pat):
+                            entryname = None
+                            break
+                    if entryname is not None:
+                        break
+                    self.currentindex += 1
+                if entryname is None:
+                    return (False, "", "", rclexecm.RclExecM.eofnow)
+                
+            ret= self.extractone(entryname)
             self.currentindex += 1
             return ret