smtpErrorAnalysis / smtpErrorAnalysis / findBadAddresses.py

Diff from to

smtpErrorAnalysis/findBadAddresses.py

 allow the problems to be analysed.
 
 Particular focus on emails bounced due to sender having used an invalid
-address
+address::
+
+    Usage: findBadAddresses.py [options]
+
+    findBadAddresses.py is used to parse a set of files  which represent the
+    'inbox' of an email account  and consider those email messages which are
+    'bounceback' emails sent by SMTP servers who have found it impossible to
+    deliver emails sent by the owner of the 'inbox'.   Command line options
+    specify the location of the 'inbox'and where output should be written to.
+
+    Options:
+      -h, --help            show this help message and exit
+      -i INBOX, --inbox=INBOX
+                            Location of INBOX
+      -o PATH, --outpath=PATH
+                            PATH to output csv file
+      -v, --verbose         Show each file processed
 
 '''
 import os
 import csv
 import re
 import pprint
+from optparse import OptionParser
 ERR1 = "Found zero email addresses so don't know what to do" 
 ERR2 = "Found more than one email address so don't know what to do [%s]"
 HDR_OUTPUT_COLS = [ 'HUM-READ-EMAIL-ADDR',
     else:
         return l_em_to_be_clnd[1]
 
-def parse_email_for_del_stat_part(file_name, path_em_file, csv_dict_wrtr):
+def parse_email_for_del_stat_part(file_name, path_em_file, 
+                                    csv_dict_wrtr, options):
     '''
     Given the text of a SMTP 'bounce message' writes a CSV row 
     to match the headers in the global variable HDR_OUTPUT_COLS.
     part multipart email message there might be problems
 
     '''
-    print "About to process : %s" % file_name
+    if options.verbose:
+        print "About to process : %s" % file_name
     em_file = file(path_em_file)
     em_msg = email.message_from_string(em_file.read())
     try:
     lst = []
     return lst
 
+def parse_args():
+    '''
+    Parses command line arguments using OptionParser.
+    Applies validation rules to arguments and then, if OK
+    returns them in a 'dictionary like' object ``options``
+
+    '''
+    desc = "%prog is used to parse a set of files \n" + \
+    "which represent the 'inbox' of an email account \n" + \
+    "and consider those email messages which are 'bounceback'\n" + \
+    "emails sent by SMTP servers who have found it impossible\n" + \
+    "to deliver emails sent by the owner of the 'inbox'.\n" + \
+    "\n\n" + \
+    "Command line options specify the location of the 'inbox'" + \
+    "and where output should be written to." 
+
+    usage_inner = "Usage: %s [options]"
+    usage = usage_inner % "%prog"
+
+
+    parser = OptionParser(description=desc, usage=usage)
+    parser.add_option(  "-i", "--inbox", action="store",  dest="inbox", 
+                        metavar="INBOX", help="Location of INBOX")
+    parser.add_option(  "-o", "--outpath", action="store", dest="outpath",
+                        metavar="PATH", help="PATH to output csv file")
+    parser.add_option(  "-v", "--verbose", action="store_true", 
+                        dest="verbose", help="Show each file processed")
+
+    (options, args) = parser.parse_args()
+
+    if (options.inbox is None) and (options.outpath is None):   
+        parser.print_help()
+        exit(-1)
+    elif not os.path.exists(options.inbox):
+        parser.error('inbox location does not exist')
+    elif not os.path.exists(os.path.dirname(options.outpath)):
+        parser.error('path to ouput location does not exist')
+
+    return options
+
 def main():
     '''
     The main() function
 
-    Needs work in order that the location of email files to be parsed
-    and the location of output files may be specificed via command
-    line params
     '''
-    lst_files_to_ignore = build_ignore_list() 
-    path = 'C:/usr/rshea/mytemp/20110609/NZLPProblemEmails-20120510/'
-    listing = os.listdir(path)
+
+    options = parse_args()
 
     #Create a csv.DictWriter to write output to
     csv_dict_wrtr = csv.DictWriter( \
-            open('NZLP-bademailaddresses-headers-20120510.csv', 'wb'), \
+            open(options.outpath, 'wb'), \
             HDR_OUTPUT_COLS, \
             restval='N/A', \
             dialect='excel')
     #Write the initial headers
     csv_dict_wrtr.writerow(dict(zip(HDR_OUTPUT_COLS, HDR_OUTPUT_COLS)))
 
+    lst_files_to_ignore = build_ignore_list() 
+
+    listing = os.listdir(options.inbox)
+
     #Process each file in turn
     for in_file_name in listing:
         if in_file_name in lst_files_to_ignore:
             pass
         else:
-            in_file_path = "%s/%s" % (path, in_file_name)
+            in_file_path = "%s/%s" % (options.inbox, in_file_name)
             parse_email_for_del_stat_part(  in_file_name, 
                                             in_file_path, 
-                                            csv_dict_wrtr)
+                                            csv_dict_wrtr,
+                                            options)
 
 if __name__ == "__main__":
     main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.