Commits

hideki nara committed 1cfe6d0

download,load,update commands are implemented (not tested yet)

  • Participants
  • Parent commits 3c5efc2

Comments (0)

Files changed (4)

 src/djpostal.egg-info/
 *.pyc
-*.sqlite3
+*.sqlite3*
+*.csv*
+*.lzh
+*.swp

File requirements.txt

 Sphinx
 pykf
+jcconv

File src/djpostal/management/commands/jpaddress.py

 # -*- coding: utf-8 -*-
 
 from django.core.management.base import BaseCommand, CommandError
-from django.contib.auth.models import User
+from django.contrib.auth.models import User
 from optparse import make_option
 from datetime import datetime
+from jcconv import *
 import commands
 import os
 import csv
+import sys
 #
-from models import JpAddress
+from djpostal.utils import *
+from djpostal.models import JpAddress
 
 class Command(BaseCommand):
     args = ''
             dest='file',
             default='ken_all.csv',
             help='JP Address Data File Name'),
+
+        make_option('--start',
+            action='store',
+            dest='start',
+            default=0,
+            help='start record'),
+
+        make_option('--end',
+            action='store',
+            dest='end',
+            default=sys.maxint,
+            help='end record'),
+
+        make_option('--charset',
+            action='store',
+            dest='charset',
+            default='sjis',
+            help='end record'),
+
+        make_option('--force','-f',
+            action='store_true',
+            dest='force',
+            help='force to do'),
         )
 
+    def download(self,*args,**options):
+        cmd= "curl %(url)s | lha x - " %  options 
+        if options['charset'] == 'utf8':
+            cmd=cmd +  ";nkf -w %(file)s > %(file)s.utf8" % options 
+            options['file'] = options['file'] + ".utf8"
+        print "download",cmd
+        print commands.getoutput( cmd )
+
+    def handle_download(self, *args, **options):
+        if os.path.exists( options['file'] ) and options['force'] ==False :
+                print "using existing",options['file']
+                return  
+
+        self.download(*args,**options)
+
+    def handle_load(self,*args, **options):
+        options['load']=True
+        self.handle_update(*args,**options)  
+
+    def handle_update(self, *args, **options):
+        print args,options
+        if os.path.exists( options['file'] ) == False :
+            if options['force'] ==False :
+                print options['file'], "doesn't exist. Download it first."
+                return
+            #: force to download
+            self.download(*args,**options)             
+
+        options['start'] = int(options['start'])
+        options['end'] = int(options['end'])
+
+        ci,cu,ce,cp,dts= 0,0,0,0,datetime.now()
+        
+        for index,data_dict in CsvUnicodeReader.csv_enumerator(
+                        open( options['file'] ),
+                        fieldnames=  [ i.name for i in JpAddress._meta.fields ][1:],
+                        encoding=options['charset'],
+                        force_strip=True,
+                        ):
+
+            if index < options['start']:
+                continue
+            if index > options['end']:
+                break
+            
+            for k,v in data_dict.items():
+                if k.find('kana') >=0 :
+                    data_dict[k] = half2hira(v) 
+            try:
+                if options.get('load',False) == False:
+                    JpAddress(**data_dict).save()                    
+                    ci = ci + 1
+                else:
+                    if data_dict['changed'] != 0:
+                        zip = data_dict.pop('zip')
+                        obj,created = JpAddress.objects.get_or_create(
+                                          zip = zip,
+                                          defaults = data_dict,
+                                      )
+                        ci,cu = (ci+1,cu) if created else (ci,cu+1)
+                    else:
+                        cp=cp+1 
+            except Exception,e:
+                print "Error on ", index , e.message
+
+        d = datetime.now() - dts
+        print "Total=%d Insearts=%d Updates=%d Ignores=%d Elapsed=%d.%d"  % (
+                    ci+cu,ci,cu,cp,d.seconds,d.microseconds)
+
     def handle(self, *args, **options):
-        if os.path.exists( options['file'] ) == False:
-            print commands.getoutput( "curl %(url)s | lha x - " %  options )
-        if os.path.exists( options['file']+".utf8" ) == False:
-            print commands.getoutput( "nkf -w %(file)s > %(file)s.utf8" % options ) 
+        if len(args) > 0 :
+            getattr(self, 'handle_%s'% args[0],self.handle_help)(*args,**options)
+        else:
+            self.handle_help(*args,**options )  
 
+    def handle_help(self,*args,**options):
+        print args,options

File src/djpostal/utils.py

     return ret
 
 class CsvUnicodeReader(object):
-    def __init__(self, stream, dialect=None, encoding=None, errors="strict", **kwds):
+    def __init__(self, stream, dialect=None, encoding=None, 
+            errors="strict", force_strip = True,**kwds):
         self.reader = csv.reader(stream, dialect=dialect or csv.excel, **kwds)
         self.encoding = encoding  if encoding else detect_encoding(stream)
         self.line_num = 0 # Needed for DictReader
         self.errors = errors
+        self.force_strip = force_strip
 
     def __iter__(self):
         return self
 
     def next(self):
         return map(lambda s: 
+            force_unicode(s, encoding=self.encoding, errors=self.errors).strip() \
+            if self.force_strip else \
             force_unicode(s, encoding=self.encoding, errors=self.errors), 
             self.reader.next())