Commits

Anonymous committed bd84511

Initial commit

Comments (0)

Files changed (18)

+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Programmer: Eric Parent
+#
+# mailto:eric.parent@gerad.ca
+#
+# (c) Eric Parent, 2009
+#
+
+"""
+@package NLTime
+
+Module containing classes for parsing and manipulating time info as
+can be found in NetLine 2005 itineraries files.
+
+"""
+
+import re
+import exceptions
+
+
+#-----------------------------------------------------
+#
+#  Class NL05Line
+#
+#-----------------------------------------------------
+
+class NLTime():
+   """
+   """
+   MAX_NB_MINUTES = 1440 # = 23*60 + 59
+
+   def __init__(self, time_literal=""):
+      """
+      Constructor.
+      """
+      self.hours = 0
+      self.minutes = 0
+      self.nbMinutes = self.getNbOfMinutes(self.hours, self.minutes)
+      if time_literal:
+         if not self.__isValid(time_literal):
+            raise exceptions.ValueError, \
+                "Invalid value given to constructor of NLTime class."
+         self.literal = time_literal
+         self.hours, self.minutes, self.nbMinutes = self.__parse(self.literal)
+
+
+   def toLong(self, time_literal=""):
+      if time_literal:
+         if not self.__isValid(time_literal):
+            raise exceptions.valueError, \
+                "Invalid time literal given to NLTime."
+         self.literal = time_literal
+         hours, minutes, nbMinutes = self.__parse(time_literal)
+      return self.nbMinutes
+
+
+   def toLiteral(self, hours=None, minutes=None):
+      format = "%02d%02d"
+      if hours is not None and minutes is not None:
+         if not self.hoursWithinRange(hours) \
+                or not self.minutesWithinRange(minutes) \
+                or not self.__isValid(format % (hours, minutes,)):
+            raise exceptions.ValueError, "invalid format"
+         fields = (hours, minutes,)
+      else:
+         fields = (self.hours, self.minutes,)
+      return format % fields
+
+
+   def __parse(self, time_literal):
+      if not self.__isValid(time_literal):
+         raise exceptions.ValueError, 'Invalid format'
+      hours = int(time_literal[:2])
+      minutes = int(time_literal[2:])
+      nbMinutes = self.getNbOfMinutes(hours, minutes)
+      if not self.nbMinutesWithinRange(nbMinutes):
+         raise exceptions.ValueError, \
+             'Invalid time literal given to NLTime.'
+      return (hours, minutes, nbMinutes,)
+
+
+   def getNbOfMinutes(self, hours=0, minutes=0):
+      if self.hoursWithinRange(hours) \
+             and self.minutesWithinRange(minutes):
+         return hours*60 + minutes
+      else:
+         print "hours: %02d, minutes: %02d" % (hours, minutes,)
+         raise exceptions.ValueError, \
+             "Invalid numerical values given to NLTime.getNbOfMinutes()."
+
+
+   def nbMinutesWithinRange(self, nbMin):
+      max_value = 23 * 60 + 59
+      return self.__withinRange(nbMin, 0, NLTime.MAX_NB_MINUTES-1)
+
+
+   def hoursWithinRange(self, hours):
+      return self.__withinRange(hours, 0, 23)
+
+
+   def minutesWithinRange(self, minutes):
+      return self.__withinRange(minutes, 0, 59)
+
+
+   def __withinRange(self, value, minVal=0, maxVal=59):
+      if value <= maxVal and value >= minVal:
+         return True
+      return False
+
+
+   def __isValid(self, time_literal):
+      """
+      Expects the time literal to be of the form '0745' for 7:45 AM
+      and '1642' for 4:42 PM.
+
+      @param time_literal a string representing a time literal as
+      described previously.
+
+      @return a boolean answer.
+      """
+      expression = r'[0-9]{4}'
+      pattern = re.compile(expression)
+      if pattern.match(time_literal):
+         return True
+      return False
+
+
+   def __add__(self, other):
+      nbMins = (self.nbMinutes + other.nbMinutes) % NLTime.MAX_NB_MINUTES
+      if nbMins < 0: nbMins += NLTime.MAX_NB_MINUTES
+      return self.__copyFromNbMins(nbMins)
+
+
+   def __sub__(self, other):
+      nbMins = (self.nbMinutes - other.nbMinutes) % NLTime.MAX_NB_MINUTES
+      if nbMins < 0: nbMins += NLTime.MAX_NB_MINUTES
+      return self.__copyFromNbMins(nbMins)
+
+
+   def __copyFromNbMins(self, nbMins):
+      hours = nbMins / 60
+      minutes = nbMins % 60
+      return NLTime(self.toLiteral(hours, minutes))
+
+
+   def toString(self):
+      return self.toLiteral()
+
+
+   def __str__(self):
+      return self.toString()
+
+
+   def __eq__(self, other):
+      return self.toLong() == other.toLong()
+
+
+#
+#-- End of "class NL05Line"
+
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Programmer: Eric Parent
+#
+# mailto:eric.parent@gerad.ca
+#
+# (c) Eric Parent, 2009
+#
+
+"""
+@package NetLine05
+
+Module containing classes for parsing and manipulating NetLine 2005
+itineraries files.
+
+"""
+
+import os
+import sys
+import exceptions
+import fileinput
+import getopt
+import utils
+from PIG_AnalysisTools import *
+from NLTime import NLTime
+
+
+#-----------------------------------------------------
+#
+#  Class NL05Line
+#
+#-----------------------------------------------------
+
+class NL05Line():
+   """
+   This class allows the parsing and manipulation of data
+   contained on one line of NetLine2005 itineraries file.
+
+   Since itineraries with more than 5 legs are considered to be
+   marginals, they are not parsed by this program.
+
+   """
+
+   #--- Constants ---#
+   Flight1 = 0
+   Flight2 = 1
+   Flight3 = 2
+   Origin = 3
+   Connection1 = 4
+   Connection2 = 5
+   Destination = 6
+   DepartureTime = 7
+   ArrivalTime = 8
+   ElapsedTime = 9
+   Plane1 = 10
+   Plane2 = 11
+   Plane3 = 12
+   DayOfWeek = 13
+   Base = 14
+   Dem = 15
+   Rev = 16
+   NbOfLegs = 17
+   Leg1 = 18
+   Leg2 = 19
+   Leg3 = 20
+   Leg4 = 21
+   Leg5 = 22
+   Spill = 23
+   Rec = 24
+   Mark = 25
+   AR1 = 26
+   DE1 = 27
+   AR2 = 28
+   DE2 = 29
+   AIRPORTS = 30
+   LineLength = 31
+   VoidLeg = 'ZZZZZZXXX9999'
+   VoidTime = '0000'
+   NaN = -9999
+   NoAirport = 'ZZZ'
+   Demand = Dem
+
+
+   def __init__(self, line):
+      """
+      Constructor.
+
+      This takes as input a line read from a NetLine (2005) itinerary
+      file and parses its content.
+      """
+      self.line = range(NL05Line.LineLength)
+      self.__parseLine(line)
+      self.content = range(NL05Line.LineLength)
+      key, itinerary = self.itineraryFromLine()
+      self.key = self.getKey()
+      self.itinerary = itinerary
+
+
+   def originDestination(self):
+      """
+      Returns the origin/destination string.
+
+      Such string looks like this: 'Airport_YUL/Airport_YVR'
+      """
+      return self.getKey()
+
+
+   def getDemand(self):
+      """
+      Returns the float corresponding to the demand associated with this
+      itinerary.
+
+      Returns a float corresponding to the demand being satisfied by
+      this itinerary.
+      """
+      return float(self.line[NL05Line.Dem])
+
+
+   def __flightIndex(self, nb):
+      if nb == 1:
+         return NL05Line.Flight1
+      elif nb == 2:
+         return NL05Line.Flight2
+      elif nb == 3:
+         return NL05Line.Flight3
+      else:
+         return -1
+
+
+   def __getitem__(self, i):
+      """
+      """
+      if self.line:
+         return self.line[i]
+      else:
+         raise "ERROR"
+         return None
+
+
+   def rawLine(self):
+      return self.raw_line
+
+
+   def __parseLine(self, line):
+      """
+      Lit le contenu d'une line d'un fichier d'itinéraires et le stocke
+      dans un dictionnaire.
+      
+      Retourne le dictionnaire renseigné.
+      """
+      debut = 0
+      fin = 8
+
+      for i in range(1,4):
+         self.line[self.__flightIndex(i)] = line[debut:fin].strip()
+         debut = fin
+         fin = fin + 8
+
+      self.line[NL05Line.Origin] = line[24:27].strip()
+      self.line[NL05Line.Connection1] = line[27:31].strip()
+      self.line[NL05Line.Connection2] = line[31:35].strip()
+      self.line[NL05Line.Destination] = line[35:39].strip()
+      self.line[NL05Line.DepartureTime] = line[39:44].strip()
+      self.line[NL05Line.ArrivalTime] = line[44:49].strip()
+      self.line[NL05Line.ElapsedTime] = line[49:54].strip()
+      self.line[NL05Line.Plane1] = line[54:58].strip()
+      self.line[NL05Line.Plane2] = line[58:62].strip()
+      self.line[NL05Line.Plane3] = line[62:66].strip()
+      self.line[NL05Line.DayOfWeek] = line[66:69].strip()
+      self.line[NL05Line.Base] = line[69:76].strip()
+      self.line[NL05Line.Dem] = line[76:82].strip()
+      self.line[NL05Line.Rev] = line[82:89].strip()
+      self.line[NL05Line.NbOfLegs] = line[89:92].strip()
+      self.line[NL05Line.Leg1] = line[92:105].strip()
+      self.line[NL05Line.Leg2] = line[105:119].strip()
+      self.line[NL05Line.Leg3] = line[119:133].strip()
+      self.line[NL05Line.Leg4] = line[133:147].strip()
+      self.line[NL05Line.Leg5] = line[147:161].strip()
+      self.line[NL05Line.Spill] = line[161:169].strip()
+      self.line[NL05Line.Rec] = line[169:175].strip()
+      self.line[NL05Line.Mark] = line[175:181].strip()
+      self.line[NL05Line.AR1] = line[181:186].strip()
+      self.line[NL05Line.DE1] = line[186:191].strip()
+      self.line[NL05Line.AR2] = line[191:196].strip()
+      self.line[NL05Line.DE2] = line[196:201].strip()
+      self.legs = [self.line[NL05Line.Leg1], \
+                 self.line[NL05Line.Leg2], \
+                 self.line[NL05Line.Leg3], \
+                 self.line[NL05Line.Leg4], \
+                 self.line[NL05Line.Leg5]]
+      self.line[NL05Line.AIRPORTS] = self.visitedAirports(self.legs)
+
+      nbOfLegs = int(self.line[NL05Line.NbOfLegs])
+      if nbOfLegs > 5:
+         self.line = None
+      self.raw_line = line
+
+
+   def connectionTimes(self):
+      """
+      Gets the connection times at all the stations.
+
+      >> L = NL05Line(line_of_text)
+      >> print L.connectionTimes()
+      [ ('YUL', 46), ('YYC', 71) ]
+      >>
+
+      @return a list of tuples in the form 
+      """
+      connTimes = []
+      nb_legs = int(self.line[NL05Line.NbOfLegs])
+      # if there is only one leg in the itinerary, there is obviously
+      # no connection time. Only consider when there is more than one
+      # leg.
+      if nb_legs > 1:
+         time_fields = self.timeFields(nb_legs)
+         for i in range(nb_legs - 1):
+            # legs are numbered from 1 to 4 inclusively, hence an
+            # offset of 1 is needed.
+            idx = self.leg_index(i+1)
+            leg_desc = self.line[idx]
+            station = leg_desc[3:6]
+            arrivalTime = NLTime(time_fields[i][1])
+            departureTime = NLTime(time_fields[i+1][0])
+            if not (arrivalTime == departureTime):
+               waiting = departureTime - arrivalTime
+               duration = waiting.toLong()
+               connTimes.append( (station, duration,) )
+      return connTimes
+
+
+   def airportName(self, airportCode):
+      """Returns the airport name as used in FlightScheduler module.
+
+      For example, if airport code is 'YUL', the returned airport name
+      is 'Airport_YUL'.
+      """
+      #return 'Airport_' + airportCode
+      return airportCode
+   
+
+   def timeFields(self, nb_legs):
+      indexes = self.timeFieldsIndexes(nb_legs, str_format=False)
+      _fields = []
+      for j, k in indexes:
+         _fields.append((self.line[j], self.line[k],))
+      return _fields
+
+
+   def timeFieldsIndexes(self, nb_of_legs, str_format=False):
+      f = int
+      if str_format:
+         f = str
+      n = int(nb_of_legs)
+      if n == 1:
+         return ((f(NL05Line.DepartureTime),
+                  f(NL05Line.ArrivalTime)), )
+      elif n == 2:
+         return ((f(NL05Line.DepartureTime), f(NL05Line.AR1)),
+                 (f(NL05Line.DE1), f(NL05Line.ArrivalTime)))
+      elif n == 3:
+         return ((f(NL05Line.DepartureTime), f(NL05Line.AR1)),
+                 (f(NL05Line.DE1), f(NL05Line.AR2)),
+                 (f(NL05Line.DE2), f(NL05Line.ArrivalTime)))
+      elif n == 4:
+         return ((f(NL05Line.DepartureTime), f(NL05Line.AR1)),
+                 (f(NL05Line.DE1), f(NL05Line.AR2)),
+                 (f(NL05Line.DE2), f(NL05Line.ArrivalTime)), (0, 0))
+      else:
+         return (None, None)
+
+
+   def leg_index(self, number):
+      if number < 1 or number > 5:
+         raise exceptions.ValueError, "Invalid number given for leg index."
+      if number == 1:
+         return NL05Line.Leg1
+      elif number == 2:
+         return NL05Line.Leg2
+      elif number == 3:
+         return NL05Line.Leg3
+      elif number == 4:
+         return NL05Line.Leg4
+      else:
+         return NL05Line.Leg5
+
+
+   def legTags(self, line, legNb):
+      """Format of leg name is 'Leg_AC_8300_YEG_7'."""
+      if legNb < 1 or legNb > 4:
+         raise exceptions.ValueError, 'Invalid index for flight leg.'
+      nbOfLegs = int(line[NL05Line.NbOfLegs])
+      dayOfWeek = int(line[NL05Line.DayOfWeek])
+      times = self.timeFieldsIndexes(nbOfLegs)
+      times_list = []
+      for u in times:
+         times_list.extend(u)
+      # retrieve the leg information
+      legsStrings = []
+      prev_departure_time = 0
+      for i in range(nbOfLegs):
+         _leg = line[self.leg_index(i+1)]
+         if _leg == NL05Line.VoidLeg:
+            raise ValueError, "Invalid leg descriptor found."
+         dep_airport = _leg[:3].strip()
+         arr_airport =  _leg[3:6].strip()
+         airline = _leg[6:8].strip()
+         flight_nb = _leg[8:].strip()
+         (dep_time, arr_time) = times[i]
+         day_offset = self.weekDayOffset(times_list[:(i+1)*2])
+         # Use of modulo conversion so we stay in the range of
+         # 1 to 7 days (Monday to Sunday, inclusively)
+         #
+         departure_day = (dayOfWeek + day_offset) % 7
+         if departure_day == 0: departure_day = 7
+         legsStrings.append( "_".join(["Leg",
+                                       airline,
+                                       flight_nb,
+                                       dep_airport,
+                                       str(departure_day)]) )
+      return legsStrings
+
+
+   def weekDayOffset(self, times):
+      """Returns the weekday offset (int) due to time runaround the
+      clock. Recursive function."""
+      if not times or len(times) == 1:
+         return 0
+      delta = (int(times[-1]) < int(times[-2])) and 1 or 0
+      return delta + self.weekDayOffset(times[:-1])
+
+
+   def listOfLegs(self):
+      legs = []
+      nbOfLegs = self.line[NL05Line.NbOfLegs]
+      if nbOfLegs == '':
+         raise exception.ValueError, 'Invalid value for NbOfLegs!!!'
+      return self.legTags(self.line, int(nbOfLegs))
+      
+
+   def itineraryFromLine(self):
+      """Computes the itinerary with leg tags as generated by the
+      FlightScheduler program, e.g.
+
+      'Leg_AC_8300_YEG_5', 'Leg_AC_120_YUL_6', ...
+      """
+      return (self.getKey(), ', '.join(self.listOfLegs()))
+
+
+   def getItinerary(self):
+      """Returns the itinerary computed when the class was
+      instantiated."""
+      return self.itinerary
+
+
+   def getKey(self):
+      """Returns the key for the itinerary storage purposes."""
+      orig = self.airportName(line[NL05Line.Origin])
+      dest = self.airportName(line[NL05Line.Destination])
+      return "/".join([orig, dest])
+
+
+   def keyAndItinerary(self):
+      """Returns a tuple consisting of the key and the itinerary:
+      (key, itinerary)
+
+      The key has the form 'Airport_YUL/Airport_YYC' and describes the
+      origin destination airports.
+      """
+      return (self.key, self.itinerary)
+
+
+   def visitedAirports(self, legs):
+      """Returns the list of airports 3-letter ID (IATA) that were
+      visited the flight."""
+      n = len(legs)
+      iata = []
+      # At least, a flight is made of one leg...
+      iata.append(legs[0][0:3])
+      for i in range(n):      
+         leg = legs[i]
+         if leg != NL05Line.VoidLeg:
+            iata.append(leg[3:6])
+         else:
+            iata.append(NL05Line.NoAirport)
+      return iata
+
+
+   def getKey(self):
+      """
+      """
+      orig = self.airportName(self.line[NL05Line.Origin])
+      dest = self.airportName(self.line[NL05Line.Destination])
+      return self.buildKey(orig, dest)
+
+
+   def buildKey(self, orig, dest):
+      """
+      Builds the key for retrieval in a database, based on the names
+      designated by orig and dest.
+      """
+      return orig + "/" + dest
+
+
+   def flightTag(self, line, flightText):
+      """
+      Returns the flight tag in this format "AC_4320".
+      """
+      flightInfo = flightText.split(' ')
+      flightInfo = [f for f in flightInfo if len(f)>0]
+      return "_".join(flightInfo)
+
+
+   def indexString(self, index):
+      """ Returns the string representation of a Line index."""
+      if index == NL05Line.Flight1:
+         return "Flight1"
+      elif index == NL05Line.Flight2:
+         return 'Flight2'
+      elif index == NL05Line.Flight3:
+         return 'Flight3'
+      elif index == NL05Line.Origin:
+         return 'Origin'
+      elif index == NL05Line.Connection1:
+         return 'Connection1'
+      elif index == NL05Line.Connection2:
+         return 'Connection2'
+      elif index == NL05Line.Destination:
+         return 'Destination'
+      elif index == NL05Line.DepartureTime:
+         return 'DepartureTime'
+      elif index == NL05Line.ArrivalTime:
+         return 'ArrivalTime'
+      elif index == NL05Line.ElapsedTime:
+         return 'ElapsedTime'
+      elif index == NL05Line.Plane1:
+         return 'Plane1'
+      elif index == NL05Line.Plane2:
+         return 'Plane2'
+      elif index == NL05Line.Plane3:
+         return 'Plane3'
+      elif index == NL05Line.DayOfWeek:
+         return 'DayOfWeek'
+      elif index == NL05Line.Base:
+         return 'Base'
+      elif index == NL05Line.Dem:
+         return 'Dem'
+      elif index == NL05Line.Rev:
+         return 'Rev'
+      elif index == NL05Line.NbOfLegs:
+         return 'NbOfLegs'
+      elif index == NL05Line.Leg1:
+         return 'Leg1'
+      elif index == NL05Line.Leg2:
+         return 'Leg2'
+      elif index == NL05Line.Leg3:
+         return 'Leg3'
+      elif index == NL05Line.Leg4:
+         return 'Leg4'
+      elif index == NL05Line.Leg5:
+         return 'Leg5'
+      elif index == NL05Line.Spill:
+         return 'Spill'
+      elif index == NL05Line.Rec:
+         return 'Rec'
+      elif index == NL05Line.Mark:
+         return 'Mark'
+      elif index == NL05Line.AR1:
+         return 'AR1'
+      elif index == NL05Line.DE1:
+         return 'DE1'
+      elif index == NL05Line.AR2:
+         return 'AR2'
+      elif index == NL05Line.DE2:
+         return 'DE2'
+      elif index == NL05Line.AIRPORTS:
+         return 'AIRPORTS'
+      elif index == NL05Line.LineLength:
+         return 'LineLength'
+      elif index == NL05Line.VoidLeg:
+         return 'VoidLeg'
+      elif index == NL05Line.VoidTime:
+         return 'VoidTime'
+      else:
+         return 'BAD VALUE!!!'
+
+#
+#-- End of "class NL05Line"
+
+
+#-----------------------------------------------------
+#
+#  Class NL05Data
+#
+#-----------------------------------------------------
+
+class NL05Data():
+   """
+   This class implements the parsing of a NetLine2005
+   itineraries file.
+   """
+
+   def __init__(self, file=None):
+      """
+      Constructor.
+
+      Optionally accepts a file to be parsed. This file is assumed to
+      be an itineraries file in the NetLine (2005) format.
+      """
+      self.file = file
+      self.lines = []
+      self.content = {}
+
+
+   def matchingLines(self, criterion):
+      """
+      Returns a list of lines that are matching a specific criterion.
+      """
+      lines = []
+      if not utils.isValidFile(self.file):
+         raise exceptions.IOError, \
+             'Invalid file. Please provide with a valid file.'
+      m = len(criterion)
+      # Scan the entife file, one line at the time
+      for line in fileinput.input(self.file):
+         # Parsing of the line by a NL05Line object
+         nl = NL05Line(line)
+         V = True
+         for i in range(m):
+            V = V and (nl[criterion[i][0]] == criterion[i][1]) or False
+         if V:
+            lines.append(line)
+
+      return sorted(lines)
+
+
+   def processFile(self, file=None):
+      """
+      Processes the specified file in argument.
+
+      If the file is not valid, throws an IOError.
+      """
+      print '-> Reading the NetLine itineraries file:'
+      print os.path.abspath(file)
+      count = 0
+      if not file or file is None:
+         file = self.file
+      else:
+         self.file = file
+      if not utils.isValidFile(file):
+         raise exceptions.IOError, \
+             'Invalid file. Please provide with a valid file.'
+      # Process the entire file, one line at the time
+      self.demandDict = {}
+      for line in fileinput.input(file):
+         if line:
+            # Parsing of the line by a NL05Line object
+            nl = NL05Line(line)
+            self.lines.append(nl)
+            count = count + 1
+
+            # Retrieving the key and itinerary from the parser object
+            key, itin = nl.keyAndItinerary()
+            self.demandDict[itin] = nl.getDemand()
+            if itin is not None and key is not None:
+               # Updating the self.content dictionnary
+               if key not in self.content:
+                  self.content[key] = [itin]
+               else:
+                  self.content[key].append(itin)
+                  x = self.content[key]
+                  self.content[key] = list(set(x))
+      print 'Read %d lines in the input file.' % count
+      print
+
+
+   def demandDictionnary(self):
+      return self.demandDict.copy()
+
+
+   def displayContent(self):
+      """
+      Displays the content of the NL05Data class (after an itinerary
+      file was parsed).
+      """
+      print '--------------------------------'
+      print 'Printout of the NL05Data object.'
+      print
+      for key in self.content.keys():
+         print str(key)
+         print
+         for itin in self.content[key]:
+            print str(itin)
+         print
+         print
+      print '--------------------------------'
+      print
+      print 'Content is clustered into ' + str(len(self.content)),
+      print ' categories (or keys).'
+      count = self.__countItineraries(self.content)
+      print 'A total of ' + str(count) + ' itineraries are listed.'
+
+
+   def connectingTimes(self):
+      """
+      Returns a dictionnary with the minimum and maximum connection
+      time for any given airport.
+
+      example:
+      > nlData = NL05Data()
+      > nlData.processFile(fileName)
+      > d = nlData.connectingTimes()
+      > print d["YUL"]
+      (45, 122)
+
+      NOTE: The values showned are arbitrary and do not reflect
+      reality.
+
+      """
+      connTimes = {}
+      MIN_POS = 0
+      MAX_POS = 1
+      for nl_line in self.lines:
+         for (station, connTime) in nl_line.connectionTimes():
+            if not connTimes.has_key(station):
+               connTimes[station] = (connTime, connTime,)
+            min_time = min(connTimes[station][MIN_POS],connTime)
+            max_time = max(connTimes[station][MAX_POS],connTime)
+            connTimes[station] = (min_time, max_time,)
+      return connTimes
+
+
+   def hasItinerary(self, airports, itin):
+      """
+      Tells if the itinerary, for the given airports pair, is present
+      in the data.
+      Returns True if data is found, False if not.
+      """
+      if airports not in self.content:
+         return False
+      if itin not in self.content[airports]:
+         return False
+      return True
+
+
+   def itinerariesDictionnary(self):
+      """Returns the itineraries dictionnary.
+
+      The key is the Origin/Destination description string and the
+      itinerary is a list fo strings, each string in the list being an
+      itinerary.
+
+      For example:
+
+      >> d = NL05Data()
+      >> d.processFile('itineraries.in')
+      >> itins = d.itinerariesDictionnary()
+      >> for key in itins:
+      ...    for itinerary in itins[key]:
+      ...        print str(itinerary)
+
+      (snippet)
+
+      >> p = itins['Airport_YVR/Airport_YUL']
+      >> len(p)
+      ...
+
+      """
+      return self.content.copy()
+
+
+   def printItineraries(self, itineraries):
+      """
+      Print the specified itineraries (dictionnary format) to the
+      standard output.
+
+      Input: itineraries, a dictionnary containing the itineraries
+      (list of strings) grouped by origin/destination (key).
+
+      """
+      for k in itineraries.keys():
+         print k
+         for i in range(len(itineraries[k])):
+            print itineraries[k][i]
+         print
+
+
+   def __countItineraries(self, data):
+      """
+      Returns the number of all itineraries in the dictionnary.
+      """
+      count = 0
+      for key in data.keys():
+         count = count + len(data[key])
+      return count
+#
+#-- End of "class NL05Data"
+
+
+#-----------------------------------------------------
+#
+#  Script's specific functions and methods
+#
+#-----------------------------------------------------
+
+def argumentFile(file=None):
+   """
+   Gets the itinerary file (*.in) given in argument, makes
+   some routine checks about this file, and returns it.
+
+   Throws an Exception
+   """
+   if file is None:
+      file = sys.argv[1]
+   if not os.path.exists(file):
+      raise exceptions.IOError, 'File to be parsed does not exists. '\
+      'Please give a valide file to be parsed.'
+   if not os.path.isfile(file):
+      raise exceptions.IOError, 'File given is not a file. '\
+          'Please indicate a file to be parsed.'
+   return file
+
+
+def findMatchingLines():
+   try:
+      file = argumentFile()
+      if not utils.isValidFile(file):
+         raise exceptions.IOError, 'Invalid file.'
+   except exceptions.IOError:
+      print 'Please provide with a valid file.'
+      return -1
+   print 'File being treated: ' + str(file)
+
+   # Instantiate a NL05Data object to manipulate
+   # the NL05 data.
+   #
+   nlData = NL05Data(file)
+   criterion = [(NL05Line.Origin, 'YYZ'), (NL05Line.Destination, 'YVR')]
+   for item in nlData.matchingLines(criterion):
+      print str(item)
+   return 0
+   
+
+def inputFile(file=None):
+   try:
+      file = argumentFile(file)
+      if not utils.isValidFile(file):
+         raise exceptions.IOError, 'Invalid file.'
+   except exceptions.IOError:
+      print 'Please provide with a valid file.'
+      return -1
+   print 'File being treated: ' + str(file)
+   return file
+
+
+def main(arguments):
+   """
+   Proceed with the extraction of information in an itinerary file
+   (*.in) from NetLine (2005).
+   """
+   itinFile = arguments['itinFile']
+   # Instantiate a NL05Data object to manipulate
+   # the NL05 data.
+   #
+   nlData = NL05Data(itinFile)
+   nlData.processFile(itinFile)
+   if arguments['connections']:
+      print "Proceeding with compilation of connection times..."
+      conn_times = nlData.connectingTimes()
+      utils.saveDict("connectionTimes.txt", conn_times)
+   else:
+      # Ask the NL05Data object to load the itineraries from
+      # the designated file.
+      #
+      print "Retrieving the itineraries dictionnary."
+      itins = nlData.itinerariesDictionnary()
+   return 0
+
+
+def testNL05Data(args_dict):
+   import os.path
+   netlineFile = args_dict["itinFile"]
+   if not os.path.isfile(netlineFile):
+      raise exceptions.ValueError, "Please provide with a valide file."
+   netLineData = NL05Data()
+   netLineData.processFile(netlineFile)
+   netline_itins = netLineData.itinerariesDictionnary()
+   print "-- Testing NL05Data object --"
+   #netLineData.displayContent()
+   #itins = netline_itins.values()
+   #for itin in itins:
+   #   print itin
+   #print "-----------------------------"   
+   #print "Number of itineraries read = %d" % len(itins)
+
+
+def testNL05Line(args_dict):
+   import os.path
+   nl_file = args_dict["itinFile"]
+   if not os.path.isfile(nl_file):
+      raise exceptions.ValueError, "Please provide with a valide file."
+      return -1
+   count = 0
+   for line in fileinput.input(nl_file):
+      nl = NL05Line(line)
+      print nl.itinerary
+      count += 1
+   print "Itineraries read: " + str(count)
+
+
+def usage():
+    """
+    Describes the usage of this script, with arguments descriptions.
+    """
+    print 'Please use one of the following arguments in this manner:'
+    print
+    print ' --itin-file'
+    print '     File containing the itineraries to parse.'
+    print
+    print ' --demand, -d'
+    print '     Saves the computed total demand information into'
+    print '     the file specified by this option.'
+    print
+    print ' --verbose, -v'
+    print '     Display all the total demands information'
+    print '     to the standard output.'
+    print
+    print ' --connections, -c'
+    print '     Compiles the connection times for each airport (max and min)'
+    print '     and saves that info into a text file.'
+    print
+    print ' --test, -t'
+    print '     Tests the NL05Line class on a specified itineraries file.'
+    print
+    print ' --help, -h'
+    print '     Displays this message.'
+    print
+    return 0
+
+
+def parseArguments(inputArgs):
+   """
+   Parses the given list of arguments (string) given by inputArgs and
+   returns a dictionnary with  corresponding key/values.
+   """
+   options = 'itin-file= help demand= verbose test connections'
+   optionsList = options.split()
+   shortOptions = 'hdtvc'   
+   try:
+      optlist, args = getopt.getopt(inputArgs, shortOptions, optionsList)
+   except getopt.GetoptError, err:
+      print str(err)
+      usage()
+      sys.exit(2)
+   if not optlist:
+      usage()
+      sys.exit(2)
+   arguments = {}
+   arguments['itinFile'] = None
+   arguments['verbose'] = False
+   arguments['connections'] = False
+   arguments['testing'] = False
+   for option, answer in optlist:
+      if option in ("-h", "--help"):
+         sys.exit(usage())
+      elif option in ("-v", "--verbose"):
+         arguments['verbose'] = True
+      elif option in ("-c", "--connections"):
+         arguments['connections'] = True
+      elif option == '--itin-file':
+         arguments['itinFile'] = answer
+      elif option in ('--test', '-t'):
+         arguments['testing'] = True
+      else:
+         print "Option %s not recognized..." % option
+         assert False, "unhandled option"
+   return arguments
+
+
+#------------------------------------------------------------#
+if __name__ == "__main__" : 
+   args = parseArguments(sys.argv[1:])
+   if args["testing"]:
+      #testNL05Line(args)
+      testNL05Data(args)
+   else:
+      sys.exit(main(args))
+
+#------------------------------------------------------------#

PIG_AnalysisTools.py

+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Programmer: Eric Parent
+#
+# mailto:eric.parent@gerad.ca
+#
+# (c) Eric Parent, 2009
+#
+
+
+"""
+@package PIG_AnalysisTools
+
+This module provides with various capabilities for parsing information
+related to the PassengerItineraryGenerator (PIG) program.
+"""
+
+
+import os
+import sys
+import getopt
+import fileinput
+import re
+import subprocess
+import pdb
+import utils
+import NetLine05
+
+
+#-----------------------------------------------------------------
+#
+#  Script's methods and attributes
+#
+#-----------------------------------------------------------------
+
+
+DEBUG = False   ## Set this to False to disable the pdb.set_trace() calls.
+
+
+sample_file = "sample.txt"
+
+def sampleAirportsShortInfo():
+    """
+    Returns a list of (O,D) part of the sample data.
+    The list has strings in the following form (for example):
+
+    ['YUL, CDG',
+     'YYZ, LHR',
+     'BWI, YVR']
+
+    """
+    info = []
+    for line in fileinput.input(sample_file):
+        info.append(line.strip())
+    return info
+
+
+def loadSampleInfoTuples():
+    """
+    Returns a list of (O,D) tuples making the sample data.
+    The list has the following form (for example):
+
+    [('YUL','CDG'),
+     ('YYZ','LHR'),
+     ('BWI','YVR')]
+    
+    """
+    info = []
+    for line in sampleAirportsShortInfo():
+        (origin, destination) = line.split(', ')
+        info.append(tuple([origin, destination]))    
+    return info
+
+
+def sampleAirportsInfo():
+    """
+    Returns a list of (O,D) part of the sample data.
+    The list has strings in the following form (for example):
+
+    ['Airport_YUL/Airport_CDG',
+     'Airport_YYZ/Airport_LHR',
+     'Airport_BWI/Airport_YVR']
+
+    """
+    airports = []
+    ##prefix = 'Airport_'
+    prefix = ''
+    for (orig, dest) in loadSampleInfoTuples():
+        airports.append("/".join([prefix+orig, prefix+dest]))
+    return airports
+
+
+
+#-----------------------------------------------------------------
+#
+#  Class Comparator
+#
+#-----------------------------------------------------------------
+
+class Comparator():
+    """This class implements utility functions for the comparison of
+    passenger itineraries generated by the PIG module (part of
+    FlightScheduler aplication).
+
+    This class needs a shelve file which represents the reference data
+    against which the generated data is performed."""
+
+    def __init__(self):
+        """Constructor"""
+        self.generatedItinLoaded = False
+        self.shelveFileLoaded = False
+        self.PIGdata = {}
+        self.NLdata = {}
+        self.addressedDemand = {}
+
+
+    def countItineraries(self, data):
+        """Counts the itineraries in the data dict and returns an
+        integer."""
+        count = 0
+        for key in data.keys():
+            count += len(data[key])
+        return count
+
+
+    def generatedAsList(self):
+        """Returns the generated itineraries as a list."""
+        L = []
+        for key in self.PIGdata.keys():
+            L.extend(self.PIGdata[key])
+        L = list(set(L))
+        return L
+
+
+    def referenceAsList(self):
+        """Returns the reference itineraries in a list."""
+        L = []
+        for key in self.NLdata.keys():
+            L.extend(self.NLdata[key])
+        L = list(set(L))
+        return L
+
+
+    def referenceItineraries(self):
+        """Returns a copy of the reference itineraries dictionnary
+        (with keys being the Origin/Destination descriptor)."""
+        return self.NLdata.copy()
+
+
+    def intersection(self):
+        """Returns the intersection of the PIG itineraries and the
+        reference itineraries."""
+        in_both = {}
+        for key in self.PIGdata.keys():
+            if self.NLdata.has_key(key):
+                pig = set(self.PIGdata[key])
+                nl = set(self.NLdata[key])
+                # compute the intersection
+                in_both[key] = list(pig & nl)
+        return in_both
+
+
+    def difference(self, A, B):
+        """Returns the difference (set operation) of the PIG
+        itineraries and the reference itineraries."""
+        in_both = {}
+        for key in A.keys():
+            if B.has_key(key):
+                setA = set(A[key])
+                setB = set(B[key])
+                # compute the intersection
+                in_both[key] = list(setA - setB)
+        return in_both
+
+
+    def notInRef(self):
+        """Returns a dict containing the itineraries generated by PIG
+        but that were not found in the reference data set."""
+        extra = {}
+        for key in self.PIGdata.keys():
+            if self.NLdata.has_key(key):
+                pig = set(self.PIGdata[key])
+                nl = set(self.NLdata[key])
+                # compute the difference pig \ nl
+                extra[key] = list(pig - nl)
+        return extra
+
+
+    def notGenerated(self):
+        """Returns a dict containing the itineraries in the ref data
+        and that were not generated by the PIG module."""
+        forgot = {}
+        for key in self.PIGdata.keys():
+            if self.NLdata.has_key(key):
+                pig = set(self.PIGdata[key])
+                nl = set(self.NLdata[key])
+                # compute the difference nl \ pig
+                forgot[key] = list(nl - pig)
+        return forgot
+
+
+    def printItineraries(self, itins):
+        for key in itins.keys():
+            print
+            print str(key)
+            print
+            for item in itins[key]:
+                print str(item)
+        print
+
+#
+#--- End of class Comparator
+
+
+#-----------------------------------------------------------------
+#
+#  Class DemandCompiler
+#
+#  Class compiling all the addressed demand by the generated
+#  itineraries and compares that to NetLine 2005 itineraries.
+#
+#-----------------------------------------------------------------
+
+class DemandCompiler():
+    """This class is compiling all the addressed demand by the
+    generated itineraries and compares that to NetLine 2005
+    itineraries.
+    """
+    
+    def __init__(self, params):
+        """Default constructor.
+        Post-condition: loaded these private fields:
+           __pigFile
+           __netlineFile"""
+        # Extract information from "params"
+        self.__pigFile = params['pig_file']
+        self.__NetLineFile = params['netline_file']
+
+        # Process itineraries information
+        # ... retrieve the netlinePerOdItineraries
+        # ... retrieve the netlinePerItinDemand
+        #
+        self.processNetLineFile(self.__NetLineFile)
+
+        # Process the PIG file
+        # ... retrieve the pigPerOdItineraries
+        #
+        self.processPIGFile(self.__pigFile)
+
+        # Do not treat ALL the (O,D) pairs; only those PIG
+        # generated itineraries for.
+        #
+        print "Processing the PIG per OD itineraries keys...",
+        self.ODs = self.pigPerOdItineraries.keys()
+        print "...done"
+        #for od in self.ODs:
+        #    print "OD = %s" % str(od)
+
+
+    def processNetLineFile(self, netlineFile):
+        """Reads a NetLine (2005) itineraries file and loads the
+        demand information.
+
+        POST-CONDITIONS:
+        - Saved the per Origin/Destination information in a file for
+        later references.
+        """
+        self.__nlDataObject = NetLine05.NL05Data()
+        self.__nlDataObject.processFile(self.__NetLineFile)
+        self.netlinePerItinDemand \
+            = self.__nlDataObject.demandDictionnary()
+        self.netlinePerOdItineraries \
+            = self.__nlDataObject.itinerariesDictionnary()
+
+
+    def printBelowRatioThreshold(self, threshold=85):
+        netlineItins = self.netlinePerOdItineraries
+        pigItins = self.pigPerOdItineraries
+        demand_ratio = self.addressedDemandRatio(self.totalDemand(),
+                                                 self.addressedDemand())
+        for OD in demand_ratio:
+            if demand_ratio[OD] < threshold:
+                print "--- %s ---" % OD
+                if netlineItins.has_key(OD):
+                    print "(Netline)"
+                    for itin in netlineItins[OD]:
+                        print itin
+                if pigItins.has_key(OD):
+                    print "(PIG)"
+                    for itin in pigItins[OD]:
+                        print itin
+                print
+
+
+    def printSampleBelowRatioThreshold(self, threshold=85.0):
+        netlineItins = self.netlinePerOdItineraries
+        pigItins = self.pigPerOdItineraries
+        demand_ratio = self.addressedDemandRatio(self.totalDemand(),
+                                                 self.addressedDemand())
+        for OD in sampleAirportsInfo():
+            if demand_ratio.has_key(OD):
+                if demand_ratio[OD] < threshold:
+                    print "--- %s ---" % OD
+                    if netlineItins.has_key(OD):
+                        print "(Netline)"
+                        for itin in netlineItins[OD]:
+                            print itin
+                    if pigItins.has_key(OD):
+                        print "(PIG)"
+                        for itin in pigItins[OD]:
+                            print itin
+                    print
+
+
+    def processPIGFile(self, pigFile):
+        """Load data contained on a line matching a regular expression
+        corresponding to an itinerary found in the output from
+        PassengerItineraryGenerator program.
+        
+        The pattern to match is the following:
+        'YUL/VVR -> '
+        
+        where YUL and VVR are example of airport IATA identifiers.
+        
+        Returns a dictionnary with keys being the Origin/Destination
+        identifier and the value being a list of itineraries (string) in
+        the following format:
+        
+        >>> data = loadPIGfile(pig_file_name)
+        >>> for j in data.keys():
+        ...     print j
+        ...     for k in data[j]:
+        ...         print k
+        ...
+        
+        Otherwise stated (example):
+        
+        >>> data[ j ][0]
+        'Leg_AC_1231_YUL_3, Leg_AC_432_YVR_3'
+        >>> data[j+1][0]
+        'Leg_AC_441_YYX_1'
+        >>> data[j+2][0]
+        'Leg_AC_83_YVR_2, Leg_AC_8687_YUL_3, Leg_AC_554_YUQ_3'
+        ...
+        """
+        print '-> Loading generated itineraries found in file: ',
+        print os.path.abspath(pigFile)
+        #expression = r'Airport_[A-Z]{3}/Airport_[A-Z]{3} -> *'
+        expression = r'[A-Z]{3}/[A-Z]{3} -> *'   ## enlevé 'Airport...'
+        pattern = re.compile(expression)
+        data = {}
+        for line in fileinput.input(pigFile):
+            if pattern.match(line):
+                #print "Matched line: %s" % line
+                lineData = [i.strip() for i in line.split('->')]
+                key = lineData[0].strip()
+                remainder = lineData[1:]
+                itin = ", ".join([leg.strip() for leg in remainder])
+                if key not in data.keys():
+                    data[key] = [itin]
+                else:
+                    data[key].append(itin)
+                    X = data[key]
+                    data[key] = list(set(X))
+        print '-> Loading is completed...'
+        self.pigPerOdItineraries = data
+        print "Loaded %d itineraries" % len(data)
+
+
+    def displayServicesRatio(self):
+        print
+        print "- Service Ratio for all origin/destination values (BEGIN) -"
+        print
+        for key in sorted(self.__totalDemandStatus.keys()):
+            print "%s is served at %6.2f " % \
+                (key, float(self.__totalDemandStatus[key])) + "%"
+        print
+        print "- Service Ratio for all origin/destination values (END) -"
+        print
+        print "All average is %6.2f" % self.addressedDemandAverage() + "%"
+        print
+        print "Non-zero average is %6.2f" % self.nonZeroDemandAverage() + "%"
+        print
+
+
+    def totalDemand(self):
+        """Returns a dictionnary with airport as keys and returning a
+        float which is the total demand for the specified (O,D)."""
+        demand = {}
+        for OD in self.netlinePerOdItineraries.keys():
+            demand[OD] = 0.0
+            for itin in self.netlinePerOdItineraries[OD]:
+                demand[OD] += self.netlinePerItinDemand[itin]
+        return demand
+
+
+    def nbNetlineItineraries(self):
+        nbItins = {}
+        for OD in self.netlinePerOdItineraries.keys():
+            nbItins[OD] = len(self.netlinePerOdItineraries[OD])
+        return nbItins
+
+
+    def nbPIGItineraries(self):
+        nbItineraries = {}
+        for OD in self.pigPerOdItineraries.keys():
+            nbItineraries[OD] = len(self.pigPerOdItineraries[OD])
+        return nbItineraries
+
+
+    def addressedDemand(self):
+        services = {}
+        for OD in self.pigPerOdItineraries.keys():
+            services[OD] = 0.0
+            for itin in self.pigPerOdItineraries[OD]:
+                # If the itinerary is made of more than one leg...
+                #
+                if ',' in itin:
+                    if self.netlinePerOdItineraries.has_key(OD):
+                        _nl_itins = self.netlinePerOdItineraries[OD]
+                        for nl_itin in _nl_itins:
+                            if itin[:-1] == nl_itin[:-1]:
+                                services[OD] \
+                                    += self.netlinePerItinDemand[nl_itin]
+                # If the itinerary is made only of one leg ...
+                #
+                else:
+                    if self.netlinePerItinDemand.has_key(itin):
+                        services[OD] += self.netlinePerItinDemand[itin]
+        if DEBUG: pdb.set_trace()
+        return services
+
+
+    def addressedDemandRatio(self, netlineServices, pigServices):
+        """TO BE DOCUMENTED !!!"""
+        serviceRatios = {}
+        for OD in pigServices.keys():
+            serviceRatios[OD] = 0.0
+            if netlineServices.has_key(OD):
+                if netlineServices[OD]:
+                    serviceRatios[OD]=100.0*pigServices[OD]/netlineServices[OD]
+                else:
+                    serviceRatios[OD]=-8888.8888
+            "### OD : %s ..... demand ratio = %.2f" % (OD, serviceRatios[OD])
+        return serviceRatios
+
+
+    def printTotalDemandInfo(self):
+        """TO BE COMPLETED!"""
+        print "-- Total demand information --"
+        for key in sorted(self.__totalDemand.keys()):
+            print "Demand for %s is %6.1f" % (key, self.__totalDemand[key])
+            if self.__perOrigDestService.has_key(key):
+                ratio = self.__perOrigDestService[key] / self.__totalDemand[key]
+                self.__totalDemandStatus[key] = ratio * 100.0
+            else:
+                self.__totalDemandStatus[key] = 0
+        print
+
+
+    def buildTable(self):
+        table = []
+        #not_available = 'N/A'
+        # list of header fields
+        header = ['Orig./Dest.',
+                  '#NL Itins',
+                  'Total demand',
+                  '#PIG Itins',
+                  'Supplied demand',
+                  '  (%)']
+        widths = [1+len(field) for field in header]
+        # Building the header line with appropriate formatting
+        header_format = []
+        format = "%-*s" + "%*s" * (len(header)-1)
+        for i in range(len(header)):
+            header_format.extend( [widths[i], header[i]] )
+        header_line = format % tuple(header_format)
+        line_width = sum(widths)
+        table.append( '=' * line_width )
+        table.append(header_line)
+        table.append( '-' * line_width )
+        nb_pig_itins = self.nbPIGItineraries()
+        nb_netline_itins = self.nbNetlineItineraries()
+        total_demand = self.totalDemand()
+        addressed_demand = self.addressedDemand()
+        if DEBUG: pdb.set_trace()
+        demand_ratio = self.addressedDemandRatio(total_demand, addressed_demand)
+        #ODs = self.pigPerOdItineraries.keys()       # initialement commenté
+        ODs = self.netlinePerOdItineraries.keys()    # initialement actif
+        #ODs = sorted(total_demand.keys())           # initialement commenté
+        _totalDemand = 0.0
+        _totalServed = 0.0
+        _served_below = 0.0
+        _total_below = 0.0
+        sample_sum = 0.0
+        # Display elements in the sample definition
+        #
+        sample_airports = sampleAirportsInfo()
+        for key in sample_airports:
+            (line,
+             x, y, u, v, _dem_ratio) = self.__writeTableLine__(key,
+                                                               format,
+                                                               widths,
+                                                               nb_netline_itins,
+                                                               total_demand,
+                                                               nb_pig_itins,
+                                                               addressed_demand,
+                                                               demand_ratio)
+            sample_sum += _dem_ratio
+            table.append(line)
+        table.append('-' * line_width)            
+
+        # Display ALL the elements having itineraries
+        # data structure
+        #
+        for key in ODs:
+            (line,
+             _demand,
+             _served,
+             _below,
+             _t_below, _r) = self.__writeTableLine__(key,
+                                                     format,
+                                                     widths,
+                                                     nb_netline_itins,
+                                                     total_demand,
+                                                     nb_pig_itins,
+                                                     addressed_demand,
+                                                     demand_ratio)
+            table.append(line)
+            _totalDemand += _demand
+            _totalServed += _served
+            _served_below += _below
+            _total_below += _t_below
+
+        table.append('=' * line_width)
+        table.append('')
+        table.append("Total demand being served = %.2f" % _totalServed)
+        table.append("Total demand  (overall)   = %.2f" % _totalDemand)
+        _ratio = 100.0 * _totalServed / _totalDemand
+        table.append("Total demand RATIO = %.2f%%" % _ratio)
+        table.append('')
+        table.append("Demand being served (below 85%%) = %.2f" \
+                         % _served_below)
+        table.append("Demand overall (below 85%%)      = %.2f" \
+                         % _total_below)
+        if _total_below != 0.0 :
+            _ratio = 100.0 * _served_below / _total_below
+        else :
+            _ratio = -9.99
+        table.append("Total demand ratio of below 85%% = %.2f%%" % _ratio)
+        table.append('')
+        table.append('')
+        _average = float(sample_sum) / float(len(sample_airports))
+        table.append("sum of the sample = %.2f" % sample_sum)
+        table.append("nb of items in the sample = %f" % float(len(sample_airports)))
+        table.append("Sample addressed demand RATIO = %.2f%%" % _average)
+        table.append('')
+        table.append( '=' * line_width )
+        table.append('')
+        _ratio = 100.0 * _total_below / _totalDemand
+        table.append("Market share being below 85%% = %.2f%%" % _ratio)
+        table.append('')
+        return table
+
+
+    def __writeTableLine__(self,
+                           key,
+                           format,
+                           widths,
+                           nb_netline_itins,
+                           total_demand,
+                           nb_pig_itins,
+                           addressed_demand,
+                           demand_ratio,
+                           threshold=85.0):
+        content = []
+        _totalDemand = 0.0
+        _totalServed = 0.0
+        _served_below = 0.0
+        _total_below = 0.0
+        not_available = '0.0'
+        # Origin/Destination identifier
+        airport = 'Airport_'
+        od_key = key.replace(airport, '',
+                             key.count(airport)).replace('/',', ')
+        content.append(od_key)
+        # Number of itineraries found for this (O,D) in NetLine
+        if nb_netline_itins.has_key(key):
+            content.append( str(nb_netline_itins[key]) )
+        else: content.append(not_available)
+
+        # Total demand for this (O,D) according to NetLine
+        if total_demand.has_key(key):
+            content.append('%.1f' % total_demand[key])
+            _totalDemand += total_demand[key]
+        else: content.append(not_available)
+        
+        # Number of itineraries found for this (O,D) in PIG
+        if nb_pig_itins.has_key(key):
+            content.append( str(nb_pig_itins[key]) )
+        else: content.append(not_available)
+
+        # Addressed demand for this (O,D) according to NetLine
+        if addressed_demand.has_key(key):
+            content.append('%.1f' % addressed_demand[key] )
+            _totalServed += addressed_demand[key]
+        else: content.append(not_available)
+
+        # Demand ratio
+        _dem_ratio = 0.0
+        if demand_ratio.has_key(key):
+            _dem_ratio = demand_ratio[key]
+            content.append('%.1f' % demand_ratio[key] )
+            if demand_ratio[key] <= threshold:
+                if addressed_demand.has_key(key) and total_demand.has_key(key):
+                    _served_below += addressed_demand[key]
+                    _total_below += total_demand[key]
+        else: content.append(not_available)
+
+        string_info = []
+        for i in range(len(widths)):
+            string_info.extend( [widths[i], content[i]] )
+
+        line = format % tuple(string_info)
+
+        return (line,
+                _totalDemand,
+                _totalServed,
+                _served_below,
+                _total_below,
+                _dem_ratio,)
+
+
+    def displayTable(self, table):
+        """
+        Displays the content of the table for development purposes.
+        """
+        print
+        for line in table:
+            print line
+        print
+        print 'Table resuming the main "big errors" origin/destination and ',
+        print 'related information.'
+
+
+    def saveTable(self, table, file_name):
+        """
+        Saves the information table of all the Origin/Destination in the
+        appropriate file.
+
+        INPUT:
+        - table is a list of strings, preformatted.
+        - file_name is the name of the target file.
+
+        POST-CONDITION:
+        - the file named "file_name" is created and contains the
+        information as described in the "table".
+        """
+        f = open(file_name, "w")
+        for line in table:
+            f.write( str(line) + "\n" )
+        f.close()
+
+#
+#--- End of class DemandCompiler
+
+
+
+#----------------------------------------------------------------------
+#
+#                         Class LowerBoundsInfo
+#
+#----------------------------------------------------------------------
+
+class LowerBoundsInfo():
+    """Class loading the lower bounds information and keeping it for
+    later purposes."""
+
+    def __init__(self, fileName):
+        """
+
+        Loads the lower bounds information contained in the specified file.
+
+        This file is generated dufing a PIG execution.
+
+        POST-CONDITION:
+        - The private data member 'self.__lowerBounds' is populated with
+        information.
+
+        """
+
+        lowBoundVal = r'     Airport_[A-Z]{3}/Airport_[A-Z]{3}  : *'
+        lowBoundPattern = re.compile(lowBoundVal)
+        self.__lowerBounds = {}
+        for line in fileinput.input(fileName):
+            if lowBoundPattern.match(line):
+                content = [i.strip() for i in line.split(":")]
+                key = content[0]
+                val = content[-1]
+                self.__lowerBounds[key] = float(val)
+
+
+
+    def bounds(self):
+        """
+        Returns a copy of the lower bounds information.
+
+        The lower bounds info is a dictionnary whose keys are the
+        Origin/Destination identifiers (such as
+        'Airport_YUL/Airport_VVR', for example).
+        """
+        return self.__lowerBounds.copy()
+
+
+#--- End of class LowerBoundsInfo
+
+
+
+#----------------------------------------------------------------------
+#
+#                          Utility functions
+#
+#----------------------------------------------------------------------
+
+#
+#--- End of utility functions
+
+
+
+#----------------------------------------------------------------------
+
+if __name__ == "__main__" : 
+    print "This script is not self executable yet."
+    #sys.exit( main( parseArguments(sys.argv[1:]) ) )
+
+#----------------------------------------------------------------------
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Programmer: Eric Parent
+# mailto:eric.parent@gerad.ca
+# (c) Eric Parent, 2009
+#
+
+import os
+import os.path
+import sys
+import fileinput
+import re
+import getopt
+import pdb
+import consts
+import NetLine05
+import PIG_AnalysisTools
+import utils
+
+#----------------------------------------------------------------------
+#
+#                         Module specific constants
+#
+#----------------------------------------------------------------------
+
+output_dir = '/home/parent/projet/code/pig/PassengerItineraryGenerator/results'
+table_file = os.path.join(output_dir, 'errors_table.txt')
+consolidatedErrors = os.path.join(output_dir,'consolidatedErrors.txt')
+
+#----------------------------------------------------------------------
+#
+#                         Module specific methods
+#
+#----------------------------------------------------------------------
+
+def usage():
+    """Describes the usage of this script, with arguments
+    descriptions."""
+    print
+    print 'Please use one of the following arguments in this manner:'
+    print
+    print ' --pig='
+    print '       The file with passenger itineraries being generated from '
+    print '       the "PassengerItineraryGenerator" (PIG) program.'
+    print
+    print ' --netline='
+    print '       The file with passenger itineraries from NetLine (2005).'
+    print
+    print ' --table='
+    print '       A name for the file which will hold the "table" information.'
+    print
+    print ' --testing'
+    print '       Allows for running the parser on a NetLine file and'
+    print '       displays itineraries on screen.'
+    print
+    print ' --help, -h'
+    print '       Displays this message.'
+    print
+    return consts.SUCCES
+
+
+def parse_arguments(inArgs):
+    """Parses the options and return the values in a dictionnary.
+
+    The fields are the following:
+
+    Returns a dictionnary with the appropriate values for options
+    (keys of the dictionnary)."""
+    options = 'help netline= pig= table= testing'
+    optionsList = options.split()
+    shortOptions = 'h'
+    try:
+        optlist, args = getopt.getopt(inArgs, shortOptions, optionsList)
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        sys.exit(2)
+    if not optlist:
+        usage()
+        sys.exit(2)
+
+    # Creation of dictionnary and assigning default values.
+    arguments = {}
+    arguments['netline_file'] = None
+    arguments['pig_file'] = None
+    arguments['bounds_file'] = None
+    arguments['testing'] = False
+    arguments['table_file'] = os.path.abspath(table_file)
+
+    for option, answer in optlist:
+        # help
+        if option in ("-h", "--help"):
+            sys.exit(usage())
+        # demand-shelve
+        elif option == '--netline':
+            arguments['netline_file'] = os.path.abspath(answer)
+        # pig-file
+        elif option == '--pig':
+            arguments['pig_file'] = os.path.abspath(answer)
+        elif option == '--table':
+            arguments['table_file'] = os.path.abspath(answer)
+        elif option == '--testing':
+            arguments['testing'] = True
+        else:
+            usage()
+            assert False, "unhandled option"
+    return arguments
+
+
+def load_service_ratio(fileName):
+    serviceRatio = r'Airport_[A-Z]{3}/Airport_[A-Z]{3} is served at *'
+    servicePattern = re.compile(serviceRatio)
+    serviceDict = {}
+    for line in fileinput.input(fileName):
+        if servicePattern.match(line):
+            content = [i.strip() for i in line.split(" is served at ")]
+            key = content[0]
+            val = content[-1]
+            serviceDict[key] = float(val)
+    fileinput.close()
+    return serviceDict
+
+
+def compare(file):
+    compteur = 0
+    print '- Lecture du fichier %s ' % str(file)
+    for line in fileinput.input(file):
+        compteur += 1
+        # if it is information about the "key"
+        if lowBoundPattern.match(line):
+            boundKey.append([i.strip() for i in line.split(":")][1])
+        # if it is information about the "value"
+        elif pattern.match(line):
+            boundVal.append([i.strip() for i in line.split(":")][0])
+        # if it is overhead information...
+        else:
+            pass
+    fileinput.close()
+    print '- Lecture du fichier complétée...'
+    print '- Lu %d lignes en tout' % compteur
+    keySet = set(boundKey)
+    valSet = set(boundVal)
+    print
+    print "Cardinalité de l'ensemble des clés    : %d" % len(keySet)
+    print "Cardinalité de l'ensemble des valeurs : %d" % len(valSet)
+    if len(keySet) > len(valSet) :
+        print "Une différence entre les ensembles existe..."
+        print "Différence de %d" % (len(keySet) - len(valSet))
+        print "Voici les différences:"
+        for i in (keySet - valSet):
+            print str(i)
+    elif len(keySet) < len(valSet) :
+        print "Une différence entre les ensembles existe..."
+        print "Différence de %d" % (len(valSet) - len(keySet))
+        print "Voici les différences:"
+        for i in (valSet - keySet):
+            print str(i)
+    return 0
+
+
+def getExecutionTime(argsDict):
+    fName = argsDict['pig_file'] = None
+    expression = '- Time required by algorithm ='
+    exec_time = "0.0"
+    for line in fileinput.input(pigFile):
+        if expression in line:
+            (head, tail) = line.split("=")
+            exec_time = tail.strip().split(" ")
+    return float(exec_time)
+
+
+def addExecutionTimeInfo(fName):
+    f = open(fName, "a+")
+    if f:
+        f.write("Execution time of algorithm = %.3f\n"
+                % getExecutionTime(argsDict))
+    f.close()
+
+
+def main(argsDict):
+    """
+    Input:
+    - args is a dictionnary with the arguments values
+    """
+
+    output_dir = os.path.join('/home/parent/projet/code/pig',
+                              '/PassengerItineraryGenerator/results')
+    
+    consolidatedErrors = os.path.join(output_dir,'consolidatedErrors.txt')
+    #
+    # 1 - Instantiate a DemandCompiler object to compile all the data.
+    #
+    print "-> Instantiation of a DemandCompiler object..."
+    demandCompiler = PIG_AnalysisTools.DemandCompiler(argsDict)
+    #
+    # 2 - Build the information table
+    #
+    print "->  Building the demands table..."
+    table = demandCompiler.buildTable()
+    #demandCompiler.displayTable(table)
+    #
+    # 3 - Save the table in a file for later references.
+    #
+    tableFile = argsDict['table_file']
+    demandCompiler.saveTable(table, tableFile)
+    #
+    # 4 - Display itineraries which are below a certain threshold
+    #
+    threshold = 85 # This value represents a demand ratio in %
+
+
+#--- End of module specific methods
+
+
+# Main execution of the script
+#
+if __name__ == "__main__" : 
+    sys.exit( main( parse_arguments(sys.argv[1:]) ) )
+

compareResults.py

+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Programmer: Eric Parent
+#
+# mailto:eric.parent@gerad.ca
+#
+# (c) Eric Parent, 2009
+#
+
+import os
+import sys
+import getopt
+import fileinput
+import utils
+import NetLine05
+import exceptions
+import consts
+import PIG_AnalysisTools
+
+
+#### Script methods ####
+
+def argumentFiles():
+    """
+
+    @brief Gets the shelve file and pig file to be processed.
+    Some routine checks about these files and returns them.
+    
+    @throw exceptions.IOError if invalid files were given.
+
+    """
+    if len(sys.argv) != 3:
+        sys.exit(usage())
+    pigFile = sys.argv[1]
+    shelveFile = sys.argv[2]
+    if not utils.isValidFile(pigFile):
+        raise exceptions.IOError, \
+            'Invalid file. Please provide with a valid file.'
+    return str(pigFile), str(shelveFile)
+
+
+def usage():
+    """
+    Describes the usage of this script, with arguments descriptions.
+    """
+    print
+    print 'Please use one of the following arguments in this manner:'
+    print
+    print ' --itin-shelve'
+    print '       Designates the itineraries shelve file.'
+    print
+    print ' --pig-file='
+    print '       Designated thePIG file with generated itineraries.'
+    print
+    print ' --display-itins, -d'
+    print '       Tells the program to show the itineraries.'
+    print
+    print ' --demand-shelve='
+    print '       Designated the total demand shelve file.'
+    print
+    print ' --help, -h'
+    print '       Displays this message.'
+    print
+    return 0
+
+
+def parseOptions(inArgs):
+    """
+    @brief Parses the options and return the values in a dictionnary.
+    The fields are the following:
+
+    'itinerariesShelveFile' the shelve file containing all the
+    itineraries from NetLine (2005)
+
+    'demandShelve' the demand shelve file (with total demand per
+    Origin/Destination)
+
+    'pigFile'           the PassengerItineraryGenerator output file
+    'showItineraries'   (boolean) telling wether to show the itineraries or not
+    'outputFile'        the file to which writing the information
+
+
+    @return a dictionnary with the appropriate values for options
+    (keys of the dictionnary).
+
+    """
+    options = 'display-itins output-file= itin-shelve=' \
+        ' pig-file= demand-shelve= help'
+    optionsList = options.split()
+    shortOptions = 'hd'
+    try:
+        optlist, args = getopt.getopt(inArgs, shortOptions, optionsList)
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        sys.exit(2)
+    if not optlist:
+        usage()
+        sys.exit(2)
+    arguments = {}
+    arguments['itinerariesShelveFile'] = None
+    arguments['demandShelve'] = None
+    arguments['pigFile'] = None
+    arguments['showItineraries'] = False
+    arguments['outputFile'] = None
+    for option, answer in optlist:
+        if option in ("-h", "--help"):
+            sys.exit(usage())
+        elif option in ("-d", "--display-itins" ):
+            arguments['showItineraries'] = True
+        elif option == "--itin-shelve":
+            arguments['itinerariesShelveFile'] = answer
+        elif option == '--pig-file':
+            arguments['pigFile'] = answer
+        elif option == '--output-file':
+            arguments['outputFile'] = answer
+        elif option == '--demand-shelve':
+            arguments['demandShelve'] = answer
+        else:
+            usage()
+            assert False, "unhandled option"
+    return arguments
+
+
+def main(argsDict):
+    """
+
+    @brief 
+
+    @param argsDict, a dict containing all the options (keys) and
+    values (values) for this application.
+
+    """
+    shelveFile = argsDict['itinerariesShelveFile']
+    demandShelveFile = argsDict['demandShelve']
+    pigFile = argsDict['pigFile']
+    showItineraries = argsDict['showItineraries']
+    outputFile = argsDict['outputFile']
+
+    comparator = PIG_AnalysisTools.Comparator()
+    if pigFile:
+        comparator.PIGdata = PIG_AnalysisTools.loadPIGfile(pigFile)
+    if shelveFile:
+        comparator.NLdata = utils.loadShelve(shelveFile)
+
+    # Print the itineraries that were not generated
+    #
+    NetLine = set(comparator.referenceAsList())
+    PIG = set(comparator.generatedAsList())
+    
+    # Itineraries that were not found in the
+    # reference set of itineraries
+    #
+    newItins = list(PIG - NetLine)  # difference
+    misses = list(NetLine - PIG)    # difference
+    hits = list(NetLine & PIG)      # Intersection of these two lists
+    
+    print
+    print 'Number of itineraries in reference = %d' % len(NetLine)
+    print 'Number of hits   =  %d' % len(hits)
+    print 'Number of misses =  %d' % len(misses)
+    print 'Hit ratio = %6.2f ' % (100.0 * (len(hits))/float(len(NetLine))),
+    print ' %'
+    print 'Number of itineraries not in ref = %d ' % len(newItins)
+    print 
+    if showItins:
+        if misses:
+            print '################################'
+            print '### A few itineraries missed ###'
+            for item in misses:
+                print str(item)
+            print
+            if newItins:
+                print '#######################'
+                print '### NEW ITINERARIES ###'
+                for item in newItins:
+                    print str(item)
+            print
+    return consts.SUCCES
+    
+
+
+if __name__ == "__main__" : 
+    """
+    Performs the comparison of the generated itineraries from PIG module
+    where they have been processed and saved in a "shelve".
+    The reference itineraries are found in the NetLine filel
+
+    """
+    sys.exit( main( parseArguments(sys.argv[1:]) ) )
+