Commits

Tim Sherratt committed d4667f7

Added option to organise zip directory structure by year.

  • Participants
  • Parent commits b28e509

Comments (0)

Files changed (1)

src/trovenewspapers/bin/do_harvest.py

 
 def main(argv):
     # Look for user-defined values in a config file
-    config = ConfigParser.SafeConfigParser({'query': '', 'filename': '', 'start': 0, 'include-text': 'no', 'include-pdf': 'no'})
+    config = ConfigParser.SafeConfigParser({'query': '', 'filename': '',
+                                            'start': 0, 
+                                            'include-text': 'no', 
+                                            'zip-directory-structure': 'title',
+                                            'include-pdf': 'no'})
     config.read('harvest.ini')
     query = config.get('harvest', 'query')
     filename = config.get('harvest', 'filename')
     start = config.getint('harvest', 'start')
     text = config.getboolean('harvest', 'include-text')
+    zip_dir = config.get('harvest', 'zip-directory-structure')
     pdf = config.getboolean('harvest', 'include-pdf')
     # Look to see if there were any config values in the command line
     try:
-        opts, args = getopt.getopt(argv, "q:f:s:tp", 
-                                   ["query=", "filename=", "start=", "text", "pdf"])
+        opts, args = getopt.getopt(argv, "q:f:s:d:tp", 
+                                   ["query=", "filename=", "start=", "zipdir=", "text", "pdf"])
     except getopt.GetoptError:                                
         sys.exit(2)
     for opt, arg in opts:
             start = arg      
         if opt in ('-t', '--text'):
             text = True
+        if opt in ('-d', '--zipdir'):
+            zip_dir = arg
         if opt in ('-p', '--pdf'):
             pdf = True
     if not query:
         print 'A Trove Newspapers search url is required.'
         sys.exit(2)
     harvester = harvest.TroveNewspapersHarvester()
-    harvester.harvest(query, filename, start, text, pdf)
+    harvester.harvest(query, filename, start, text, pdf, zip_dir)
     
 if __name__ == "__main__":
     main(sys.argv[1:])