Commits

Tim Sherratt  committed b28e509

Minor changes to work with Python2.5

  • Participants
  • Parent commits 29031f8

Comments (0)

Files changed (2)

File src/trovenewspapers/harvest.py

 along with the TroveNewspapers package. If not, see <http://www.gnu.org/licenses/>.
 '''
 
+from __future__ import with_statement #for Python 2.5
 import csv
 import os
 import sys
                                                    'newspaper_id', 'issue_date', 'page', 
                                                    'page_url','corrections','ftext'], 
                                                    dialect=csv.excel)
+        # the path.exists check is necessary for Python 2.5
         if text: 
-            self.text_zip_file = ZipFile('%s_text.zip' % self.path, 'a')
+            if os.path.exists('%s_text.zip' % self.path):
+                self.text_zip_file = ZipFile('%s_text.zip' % self.path, 'a')
+            else:
+                self.text_zip_file = ZipFile('%s_text.zip' % self.path, 'w')
         if pdf:
-            self.pdf_zip_file = ZipFile('%s_pdf.zip' % self.path, 'a')
+            if os.path.exists('%s_pdf.zip' % self.path):
+                self.pdf_zip_file = ZipFile('%s_pdf.zip' % self.path, 'a')
+            else:
+                self.pdf_zip_file = ZipFile('%s_pdf.zip' % self.path, 'w')
 
     def harvest(self, query, filename=None, start=0, text=None, pdf=None):
         '''
                                                   string.replace(news.results['issue_date'], ' ', '-'), 
                                                   news.results['page'])
                     if self.text_zip_file:
-                        self.text_zip_file.writestr('%s/%s.txt' % 
-                                                    (directory, filename), 
+                        self.text_zip_file.writestr(('%s/%s.txt' % 
+                                                     #encode added to filename because of problem with Python 2.5
+                                                    (directory, filename)).encode('utf-8'), 
                                                     news.results['text'])
                     if self.pdf_zip_file:
                         pdf_url = 'http://trove.nla.gov.au/ndp/del/printArticlePdf/%s/3?print=n' % news.results['id']
                         except Exception, error:
                             self.harvest_failure(error)
                         else:
-                            self.pdf_zip_file.writestr('%s/%s.pdf' % 
-                                                       (directory, filename), 
+                            #encode added to filename because of problem with Python 2.5
+                            self.pdf_zip_file.writestr(('%s/%s.pdf' % 
+                                                       (directory, filename)).encode('utf-8'), 
                                                        content.read())
                     self.completed += 1
                     time.sleep(1)

File src/trovenewspapers/utilities.py

 '''
 from __future__ import with_statement
 import re
-import json
+try:
+    import json
+except ImportError:
+    import simplejson as json 
 import pickle
 from urllib2 import Request, urlopen, URLError, HTTPError
 import time