Commits

Richard Shea  committed 52f7602

Added a data loader script and a few mods to accomodate it

  • Participants
  • Parent commits 2fc1c0e

Comments (0)

Files changed (4)

 
 # Database   ===========================================
 db  
+data
 # Virtual env===========================================
 venv
 

File dcdemo/dataloader.py

+"""
+Populate data structures from the external files
+"""
+__author__ = "Richard Shea (rshea@thecubagroup.com)"
+__version__ = "$Revision: 0.1 $"
+__date__ = "$Date: 2013/08/18 10:00:00 $"
+__copyright__ = "Copyright (c) 2013 Richard Shea"
+
+from django.conf import settings
+def makeDatabasesSetting():
+    '''
+    Set up database configuration for this standalone script
+    '''
+
+    DBPATH = '''/home/rshea/dev/djchdemo/db/djchdemo.db'''
+    MYDB = {
+        'default': {
+            'ENGINE': 'django.db.backends.sqlite3', 
+            'NAME': DBPATH, 
+            'USER': '',
+            'PASSWORD': '',
+            'HOST': '',   
+            'PORT': '',  
+        }
+    }
+    settings.configure(DATABASES=MYDB)
+
+import pprint
+import csv
+makeDatabasesSetting()
+from dcdemo.apps.dmodrv.models import Country
+from dcdemo.apps.dmodrv.models import City   
+
+from django.conf import settings
+from decimal import Decimal
+
+COUNTRYDATAPATH = """/home/rshea/dev/djchdemo/data/wikipedia-iso-country-codes.csv"""
+CITYDATAPATH = """/home/rshea/dev/djchdemo/data/worldcitiespop.txt"""
+COUNTRIESTOPROCESS = ['au', 'AU', 'cn', 'CN', 'nz', 'NZ' ,'us', 'US']
+def removeLineFeed(line):
+    '''
+    Removes line feeds from the argument supplied
+    '''
+    line = line.replace("\r","")
+    line = line.replace("\n","")
+    return line
+
+def processCountryData(lineFromCountryDataFile, notUsed):
+    '''
+    Expects that the 'lineFromCountryDataFile' is two
+    element string delimited by a semi-colon. The first 
+    element is expected to be Country Name; the second
+    element is expected to an ISO country code. Both
+    elements are used to instantiate an instance of 
+    Country and this is returned to the caller
+    '''
+    lstData = lineFromCountryDataFile.split(":")
+    iso = lstData[-1]
+    lstData = lstData[0].split(",")
+    name = lstData[0]
+    if iso in COUNTRIESTOPROCESS:
+        c = Country(name=name.title(), isocode=iso.lower())
+        c.save()
+
+def populateAbstract(pathToData, lineProcessingFunction, lpfArg0=None):
+    '''
+    Opens file on specified path, ignores first line and then
+    iterates over all others invoking the 'lineProcessingFunction'
+    function each time with the line of input as an argument to it.
+    
+    The returned objects are saved into a list and at the end of
+    processing the input this list is returned to the caller
+    '''
+    lstOut = []
+    firstLine = True
+    for line in open(pathToData):
+        if firstLine:
+            firstLine = False
+        else:
+            line = removeLineFeed(line)
+            if line == None or len(line.strip()) < 1:
+                pass
+            else:
+                lstOut.append(lineProcessingFunction(line, lpfArg0))
+    return lstOut
+
+def populateCountries(pathToCountryData):
+    '''
+    Invokes populateAbstract to process the input data
+    defined in the file at 'pathToCountryData'
+    '''
+    populateAbstract(pathToCountryData, processCountryData)
+
+def populateCities(pathToCityData):
+    '''
+    Invokes populateAbstract to process the input data
+    defined in the file at 'pathToCityData'
+    '''
+
+    #Produce a list of City objects based upon the data contained
+    #in the file at pathToCityData
+    lstOut = []
+    with open(pathToCityData, 'rb') as f:
+        reader = csv.reader(f)
+        dicReader = csv.DictReader(f)
+        for dicRowFromCityDataFile in dicReader:
+            if dicRowFromCityDataFile['Country'] not in COUNTRIESTOPROCESS:
+                pass
+            else:
+                try:
+                    lstOut.append(processCityData(dicRowFromCityDataFile))
+                    if lstOut[-1] == None:
+                        del lstOut[-1]
+                    if len(lstOut) % 25000 == 0:
+                        print "Up to %d" % len(lstOut)
+                except csv.Error, e:
+                    sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))
+    
+    return lstOut
+
+def processCityData(dicRowFromCityDataFile):
+    '''
+    Expects that the 'dicRowFromCityDataFile' is a dictionary with
+    the following elements.
+
+    Country
+    City
+    AccentCity
+    Region
+    Population
+    Latitude
+    Longitude
+
+    All elements apart from AccentCity are used to instantiate
+    an instance of City and this is returned to the caller
+
+    This 'dicCountries' argument is a dictionary of Country objects
+    keyed by the ISO code for the relevant country
+
+    This function returns a City object
+    '''
+    if dicRowFromCityDataFile['Country'] not in COUNTRIESTOPROCESS:
+        pass
+    else:
+        coulist = Country.objects.filter(isocode=dicRowFromCityDataFile['Country'])
+        cou = coulist[0]
+        skipit = False
+        try:
+            if dicRowFromCityDataFile['Population'] == "":
+                pop = None
+            else:
+                pop=int(dicRowFromCityDataFile['Population'])
+        except:
+            skipit = True
+            print "Skipping for population"
+        try:
+            lng=Decimal(dicRowFromCityDataFile['Longitude'])
+        except:
+            skipit = True
+            print "Skipping for Longitude"
+            
+        try:
+            lat=Decimal(dicRowFromCityDataFile['Latitude'])
+        except:
+            skipit = True
+            print "Skipping for Latitude"
+
+        try:
+            name=unicode(dicRowFromCityDataFile['City'])
+        except:
+            skipit = True
+            print "Skipping for City Name"
+
+        if not skipit:
+            thisCity = City(country=cou, name=unicode(dicRowFromCityDataFile['City']), population=pop, \
+                        longitude=Decimal(dicRowFromCityDataFile['Longitude']), latitude=Decimal(dicRowFromCityDataFile['Latitude']))
+            thisCity.save()
+        else:
+            thisCity = None
+
+        return thisCity
+
+def main():
+    lstCountries = populateCountries(COUNTRYDATAPATH)
+    lstCities = populateCities(CITYDATAPATH) 
+if __name__ == '__main__':
+    main()

File dcdemo/dcdemo/apps/dmodrv/THISMIGHTBEUSEFUL.py

-class City(object):
-
-    def __repr__(self):
-    return "this is a test and so is this"
-    def __str__(self):
-        return "%s, %s . Pop: %d . Long %d, Lat %d" % \
-                (self.name, self.country.name, \
-                self.population, self.longitude, \
-                self.latitude)
-
-    def __init__(self, country, name, population, longitude, latitude):
-
-        self.__validateInitArgs(country, name, population, longitude, latitude)
-
-        self.country = country
-        self.name = name
-        if len(population.strip()) < 1:
-            self.population = None
-        else:
-            self.population = population
-        self.longitude = longitude
-        self.latitude = latitude
-
- ~/dev/countrycitydatasetexamples/CountCity/city.py
-
-
-#from CountCity import ccdexceptions
-import ccdexceptions as ccdexceptions
-class Country(object):
-
-    def __repr__(self):
-        return "Country('%s', '%s')" % \
-                (self.name, self.isocode)
-    def __str__(self):
-        return "%s (%s)" % \
-                (self.name, self.isocode)
-
-    def __init__(self, name, isocode):
-        self.__validateInitArgs(name, isocode)
-        self.name = name
-        self.isocode = isocode
-
-    def __validateInitArgs(self, name, isocode):
-        #Name must be non-blank
-        if name == None or len(name.strip()) < 1:
-            raise ccdexceptions.EmptyString("Country Name must not be blank")
-
-        #ISO Code must be exactly two characters
-        if isocode == None:
-            raise ccdexceptions.InvalidStringLength("ISO Code must two characters in length")
-        if len(isocode.strip()) == 2:
-            pass
-        else:
-            raise ccdexceptions.InvalidStringLength("ISO Code must two characters in length")
-~
-~
-
- ~/dev/countrycitydatasetexamples/CountCity/country.py
-
-

File dcdemo/dcdemo/apps/dmodrv/models.py

 
 class Country(models.Model):
     name = models.CharField("country name", max_length=200)
-    isocode = models.CharField("country ISO Code",  max_length=2)
+    isocode = models.CharField("country ISO Code",  max_length=2, unique=True)
 
     def __str__(self):
         return "%s (%s)" % \
 class City(models.Model):
     country = models.ForeignKey(Country)
     name = models.CharField("city Name", max_length=200)
-    population = models.IntegerField("population", default=0)
-    longitude = models.IntegerField("longitude", default=0)
-    latitude = models.IntegerField("latitude", default=0)
+    population = models.IntegerField("population", default=0, null=True, blank=True)
+    longitude = models.DecimalField("longitude", default=0, max_digits=9, decimal_places=5)
+    latitude = models.DecimalField("latitude", default=0, max_digits=9, decimal_places=5)
 
     def __str__(self):
-        return "%s, %s . Pop: %d . Long %d, Lat %d" % \
+        if self.population == None:
+            pop = "N/A"
+        else:
+            pop = "%d" % self.population
+        if self.longitude == None:
+            lng = "N/A"
+        else:
+            lng = "%f" % self.longitude
+        if self.latitude == None:
+            lat = "N/A"
+        else:
+            lat = "%f" % self.latitude
+
+        return "%s, %s . Pop: %s . Long %s, Lat %s" % \
                 (self.name, self.country.name, \
-                self.population, self.longitude, \
-                self.latitude)
+                pop, lng, \
+                lat)